diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-166dc24b2f73b0541a14815081ee4c8d9ea.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-166dc24b2f73b0541a14815081ee4c8d9ea.patch deleted file mode 100644 index 8845a280..00000000 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-166dc24b2f73b0541a14815081ee4c8d9ea.patch +++ /dev/null @@ -1,8834 +0,0 @@ -From 70a24464222d56067cc610d54f66d438051e81a7 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 7 Mar 2024 10:40:41 +1100 -Subject: [PATCH] Updated vkd3d to 166dc24b2f73b0541a14815081ee4c8d9eab3269. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 181 +- - libs/vkd3d/include/private/vkd3d_memory.h | 2 +- - libs/vkd3d/include/vkd3d_shader.h | 36 +- - libs/vkd3d/include/vkd3d_types.h | 2 + - libs/vkd3d/libs/vkd3d-common/blob.c | 3 +- - libs/vkd3d/libs/vkd3d-common/debug.c | 2 +- - libs/vkd3d/libs/vkd3d-common/error.c | 1 - - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 471 +++-- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 54 +- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 39 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 499 ++++- - libs/vkd3d/libs/vkd3d-shader/fx.c | 361 +++- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 88 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 28 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 11 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 6 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 128 +- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 346 +++- - libs/vkd3d/libs/vkd3d-shader/ir.c | 1653 ++++++++++++++--- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 299 ++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 49 +- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 273 ++- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 67 +- - libs/vkd3d/libs/vkd3d/device.c | 365 ++-- - libs/vkd3d/libs/vkd3d/resource.c | 9 +- - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 82 +- - 27 files changed, 3972 insertions(+), 1087 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 6a3b530d868..974ff9446db 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -30,6 +30,9 @@ - #include - #include - #include -+#ifndef _WIN32 -+#include -+#endif - - #ifdef _MSC_VER - #include -@@ -105,11 +108,130 @@ VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsig - #define vkd3d_unreachable() vkd3d_unreachable_(__FILE__, __LINE__) - #endif - -+#ifdef VKD3D_NO_TRACE_MESSAGES -+#define TRACE(args...) do { } while (0) -+#define TRACE_ON() (false) -+#endif -+ -+#ifdef VKD3D_NO_DEBUG_MESSAGES -+#define WARN(args...) do { } while (0) -+#define FIXME(args...) do { } while (0) -+#endif -+ -+enum vkd3d_dbg_level -+{ -+ VKD3D_DBG_LEVEL_NONE, -+ VKD3D_DBG_LEVEL_ERR, -+ VKD3D_DBG_LEVEL_FIXME, -+ VKD3D_DBG_LEVEL_WARN, -+ VKD3D_DBG_LEVEL_TRACE, -+}; -+ -+enum vkd3d_dbg_level vkd3d_dbg_get_level(void); -+ -+void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); -+void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback); -+ -+const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2); -+const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args); -+const char *debugstr_a(const char *str); -+const char *debugstr_an(const char *str, size_t n); -+const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); -+ -+#define VKD3D_DBG_LOG(level) \ -+ do { \ -+ const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ -+ VKD3D_DBG_PRINTF -+ -+#define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ -+ do { \ -+ static bool vkd3d_dbg_next_time; \ -+ const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ -+ ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ -+ vkd3d_dbg_next_time = true; \ -+ VKD3D_DBG_PRINTF -+ -+#define VKD3D_DBG_PRINTF(...) \ -+ vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) -+ -+#ifndef TRACE -+#define TRACE VKD3D_DBG_LOG(TRACE) -+#endif -+ -+#ifndef WARN -+#define WARN VKD3D_DBG_LOG(WARN) -+#endif -+ -+#ifndef FIXME -+#define FIXME VKD3D_DBG_LOG(FIXME) -+#endif -+ -+#define ERR VKD3D_DBG_LOG(ERR) -+ -+#ifndef TRACE_ON -+#define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) -+#endif -+ -+#ifndef WARN_ON -+#define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) -+#endif -+ -+#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) -+ -+#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name -+ -+static inline const char *debugstr_guid(const GUID *guid) -+{ -+ if (!guid) -+ return "(null)"; -+ -+ return vkd3d_dbg_sprintf("{%08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}", -+ (unsigned long)guid->Data1, guid->Data2, guid->Data3, guid->Data4[0], -+ guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4], -+ guid->Data4[5], guid->Data4[6], guid->Data4[7]); -+} -+ -+static inline const char *debugstr_hresult(HRESULT hr) -+{ -+ switch (hr) -+ { -+#define TO_STR(u) case u: return #u; -+ TO_STR(S_OK) -+ TO_STR(S_FALSE) -+ TO_STR(E_NOTIMPL) -+ TO_STR(E_NOINTERFACE) -+ TO_STR(E_POINTER) -+ TO_STR(E_ABORT) -+ TO_STR(E_FAIL) -+ TO_STR(E_OUTOFMEMORY) -+ TO_STR(E_INVALIDARG) -+ TO_STR(DXGI_ERROR_NOT_FOUND) -+ TO_STR(DXGI_ERROR_MORE_DATA) -+ TO_STR(DXGI_ERROR_UNSUPPORTED) -+#undef TO_STR -+ default: -+ return vkd3d_dbg_sprintf("%#x", (int)hr); -+ } -+} -+ -+unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value); -+ -+struct vkd3d_debug_option -+{ -+ const char *name; -+ uint64_t flag; -+}; -+ -+bool vkd3d_debug_list_has_member(const char *string, const char *member); -+uint64_t vkd3d_parse_debug_options(const char *string, -+ const struct vkd3d_debug_option *options, unsigned int option_count); -+void vkd3d_set_thread_name(const char *name); -+ - static inline unsigned int vkd3d_popcount(unsigned int v) - { - #ifdef _MSC_VER - return __popcnt(v); --#elif defined(__MINGW32__) -+#elif defined(HAVE_BUILTIN_POPCOUNT) - return __builtin_popcount(v); - #else - v -= (v >> 1) & 0x55555555; -@@ -305,6 +427,63 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) - return vkd3d_atomic_add_fetch_u32(x, 1); - } - -+struct vkd3d_mutex -+{ -+#ifdef _WIN32 -+ CRITICAL_SECTION lock; -+#else -+ pthread_mutex_t lock; -+#endif -+}; -+ -+static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ InitializeCriticalSection(&lock->lock); -+#else -+ int ret; -+ -+ if ((ret = pthread_mutex_init(&lock->lock, NULL))) -+ ERR("Failed to initialise the mutex, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ EnterCriticalSection(&lock->lock); -+#else -+ int ret; -+ -+ if ((ret = pthread_mutex_lock(&lock->lock))) -+ ERR("Failed to lock the mutex, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ LeaveCriticalSection(&lock->lock); -+#else -+ int ret; -+ -+ if ((ret = pthread_mutex_unlock(&lock->lock))) -+ ERR("Failed to unlock the mutex, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ DeleteCriticalSection(&lock->lock); -+#else -+ int ret; -+ -+ if ((ret = pthread_mutex_destroy(&lock->lock))) -+ ERR("Failed to destroy the mutex, ret %d.\n", ret); -+#endif -+} -+ - static inline void vkd3d_parse_version(const char *version, int *major, int *minor) - { - *major = atoi(version); -diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h -index 8a2edb1000d..bb177e39add 100644 ---- a/libs/vkd3d/include/private/vkd3d_memory.h -+++ b/libs/vkd3d/include/private/vkd3d_memory.h -@@ -24,7 +24,7 @@ - #include - #include - --#include "vkd3d_debug.h" -+#include "vkd3d_common.h" - - static inline void *vkd3d_malloc(size_t size) - { -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 9e663919c38..83b90474af4 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -148,6 +148,12 @@ enum vkd3d_shader_compile_option_formatting_flags - VKD3D_SHADER_COMPILE_OPTION_FORMATTING_OFFSETS = 0x00000004, - VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER = 0x00000008, - VKD3D_SHADER_COMPILE_OPTION_FORMATTING_RAW_IDS = 0x00000010, -+ /** -+ * Emit the signatures when disassembling a shader. -+ * -+ * \since 1.12 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES = 0x00000020, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), - }; -@@ -212,6 +218,20 @@ enum vkd3d_shader_compile_option_feature_flags - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLAGS), - }; - -+/** -+ * Flags for vkd3d_shader_parse_dxbc(). -+ * -+ * \since 1.12 -+ */ -+enum vkd3d_shader_parse_dxbc_flags -+{ -+ /** Ignore the checksum and continue parsing even if it is -+ * incorrect. */ -+ VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM = 0x00000001, -+ -+ VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARSE_DXBC_FLAGS), -+}; -+ - enum vkd3d_shader_compile_option_name - { - /** -@@ -279,6 +299,15 @@ enum vkd3d_shader_compile_option_name - * \since 1.11 - */ - VKD3D_SHADER_COMPILE_OPTION_FEATURE = 0x0000000a, -+ /** -+ * If \a value is non-zero compilation will produce a child effect using -+ * shared object descriptions, as instructed by the "shared" modifier. -+ * Child effects are supported with fx_2_0, fx_4_0, and fx_4_1. This option -+ * and "shared" modifiers are ignored for fx_5_0 profile, and non-fx profiles. -+ * -+ * \since 1.12 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT = 0x0000000b, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), - }; -@@ -886,6 +915,8 @@ enum vkd3d_shader_spirv_extension - VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT, - /** \since 1.11 */ - VKD3D_SHADER_SPIRV_EXTENSION_EXT_VIEWPORT_INDEX_LAYER, -+ /** \since 1.12 */ -+ VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_EXTENSION), - }; -@@ -2377,9 +2408,8 @@ VKD3D_SHADER_API void vkd3d_shader_free_dxbc(struct vkd3d_shader_dxbc_desc *dxbc - * - * \param dxbc A vkd3d_shader_code structure containing the DXBC blob to parse. - * -- * \param flags A set of flags modifying the behaviour of the function. No -- * flags are defined for this version of vkd3d-shader, and this parameter -- * should be set to 0. -+ * \param flags A combination of zero or more elements of enum -+ * vkd3d_shader_parse_dxbc_flags. - * - * \param desc A vkd3d_shader_dxbc_desc structure describing the contents of - * the DXBC blob. Its vkd3d_shader_dxbc_section_desc structures will contain -diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h -index 12ceef42fc4..017eaf11806 100644 ---- a/libs/vkd3d/include/vkd3d_types.h -+++ b/libs/vkd3d/include/vkd3d_types.h -@@ -41,6 +41,8 @@ enum vkd3d_result - { - /** Success. */ - VKD3D_OK = 0, -+ /** Success as a result of there being nothing to do. */ -+ VKD3D_FALSE = 1, - /** An unspecified failure occurred. */ - VKD3D_ERROR = -1, - /** There are not enough resources available to complete the operation. */ -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index 06a12ef5bc4..6bc95dc55c4 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -17,11 +17,12 @@ - */ - - #define COBJMACROS -+ - #define CONST_VTABLE - #include "vkd3d.h" - #include "vkd3d_blob.h" --#include "vkd3d_debug.h" - #include "vkd3d_memory.h" -+#include "d3d12shader.h" - - struct vkd3d_blob - { -diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c -index e12cd39450a..4523fc997ef 100644 ---- a/libs/vkd3d/libs/vkd3d-common/debug.c -+++ b/libs/vkd3d/libs/vkd3d-common/debug.c -@@ -20,7 +20,7 @@ - # define _WIN32_WINNT 0x0600 /* For InitOnceExecuteOnce(). */ - #endif - --#include "vkd3d_debug.h" -+#include "vkd3d_common.h" - - #include - #include -diff --git a/libs/vkd3d/libs/vkd3d-common/error.c b/libs/vkd3d/libs/vkd3d-common/error.c -index 3572669ac1c..b8350a5404c 100644 ---- a/libs/vkd3d/libs/vkd3d-common/error.c -+++ b/libs/vkd3d/libs/vkd3d-common/error.c -@@ -17,7 +17,6 @@ - */ - - #include "vkd3d_common.h" --#include "vkd3d_debug.h" - - HRESULT hresult_from_vkd3d_result(int vkd3d_result) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 3f86bd45960..0623a129eae 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -250,6 +250,7 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_NOT ] = "not", - [VKD3DSIH_NRM ] = "nrm", - [VKD3DSIH_OR ] = "or", -+ [VKD3DSIH_ORD ] = "ord", - [VKD3DSIH_PHASE ] = "phase", - [VKD3DSIH_PHI ] = "phi", - [VKD3DSIH_POW ] = "pow", -@@ -321,6 +322,7 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_UMAX ] = "umax", - [VKD3DSIH_UMIN ] = "umin", - [VKD3DSIH_UMUL ] = "umul", -+ [VKD3DSIH_UNO ] = "uno", - [VKD3DSIH_USHR ] = "ushr", - [VKD3DSIH_UTOD ] = "utod", - [VKD3DSIH_UTOF ] = "utof", -@@ -370,6 +372,7 @@ struct vkd3d_d3d_asm_colours - const char *swizzle; - const char *version; - const char *write_mask; -+ const char *label; - }; - - struct vkd3d_d3d_asm_compiler -@@ -377,7 +380,7 @@ struct vkd3d_d3d_asm_compiler - struct vkd3d_string_buffer buffer; - struct vkd3d_shader_version shader_version; - struct vkd3d_d3d_asm_colours colours; -- enum vsir_asm_dialect dialect; -+ enum vsir_asm_flags flags; - const struct vkd3d_shader_instruction *current; - }; - -@@ -511,79 +514,88 @@ static void shader_dump_uav_flags(struct vkd3d_d3d_asm_compiler *compiler, uint3 - vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", uav_flags); - } - --static void shader_dump_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_tessellator_domain domain) -+static void shader_print_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_tessellator_domain d, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *domain; - -- shader_addline(buffer, "domain_"); -- switch (domain) -+ switch (d) - { - case VKD3D_TESSELLATOR_DOMAIN_LINE: -- shader_addline(buffer, "isoline"); -+ domain = "domain_isoline"; - break; - case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -- shader_addline(buffer, "tri"); -+ domain = "domain_tri"; - break; - case VKD3D_TESSELLATOR_DOMAIN_QUAD: -- shader_addline(buffer, "quad"); -+ domain = "domain_quad"; - break; - default: -- shader_addline(buffer, "unknown_tessellator_domain(%#x)", domain); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, d, compiler->colours.reset, suffix); -+ return; - } -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, domain, suffix); - } - --static void shader_dump_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_tessellator_output_primitive output_primitive) -+static void shader_print_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_shader_tessellator_output_primitive p, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *primitive; - -- shader_addline(buffer, "output_"); -- switch (output_primitive) -+ switch (p) - { - case VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT: -- shader_addline(buffer, "point"); -+ primitive = "output_point"; - break; - case VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE: -- shader_addline(buffer, "line"); -+ primitive = "output_line"; - break; - case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW: -- shader_addline(buffer, "triangle_cw"); -+ primitive = "output_triangle_cw"; - break; - case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW: -- shader_addline(buffer, "triangle_ccw"); -+ primitive = "output_triangle_ccw"; - break; - default: -- shader_addline(buffer, "unknown_tessellator_output_primitive(%#x)", output_primitive); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, p, compiler->colours.reset, suffix); -+ return; - } -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive, suffix); - } - --static void shader_dump_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_tessellator_partitioning partitioning) -+static void shader_print_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_shader_tessellator_partitioning p, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *partitioning; - -- shader_addline(buffer, "partitioning_"); -- switch (partitioning) -+ switch (p) - { - case VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER: -- shader_addline(buffer, "integer"); -+ partitioning = "partitioning_integer"; - break; - case VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2: -- shader_addline(buffer, "pow2"); -+ partitioning = "partitioning_pow2"; - break; - case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: -- shader_addline(buffer, "fractional_odd"); -+ partitioning = "partitioning_fractional_odd"; - break; - case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: -- shader_addline(buffer, "fractional_even"); -+ partitioning = "partitioning_fractional_even"; - break; - default: -- shader_addline(buffer, "unknown_tessellator_partitioning(%#x)", partitioning); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, p, compiler->colours.reset, suffix); -+ return; - } -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, partitioning, suffix); - } - - static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, -@@ -646,6 +658,8 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum - [VKD3D_DATA_UINT8 ] = "uint8", - [VKD3D_DATA_UINT64 ] = "uint64", - [VKD3D_DATA_BOOL ] = "bool", -+ [VKD3D_DATA_UINT16 ] = "uint16", -+ [VKD3D_DATA_HALF ] = "half", - }; - - const char *name; -@@ -793,8 +807,8 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, - } - } - --static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_src_param *param); -+static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix); - - static void shader_print_float_literal(struct vkd3d_d3d_asm_compiler *compiler, - const char *prefix, float f, const char *suffix) -@@ -891,13 +905,9 @@ static void shader_print_untyped_literal(struct vkd3d_d3d_asm_compiler *compiler - static void shader_print_subscript(struct vkd3d_d3d_asm_compiler *compiler, - unsigned int offset, const struct vkd3d_shader_src_param *rel_addr) - { -- vkd3d_string_buffer_printf(&compiler->buffer, "["); - if (rel_addr) -- { -- shader_dump_src_param(compiler, rel_addr); -- vkd3d_string_buffer_printf(&compiler->buffer, " + "); -- } -- shader_print_uint_literal(compiler, "", offset, "]"); -+ shader_print_src_param(compiler, "[", rel_addr, " + "); -+ shader_print_uint_literal(compiler, rel_addr ? "" : "[", offset, "]"); - } - - static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler, -@@ -920,7 +930,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; - static const char * const misctype_reg_names[] = {"vPos", "vFace"}; - -- shader_addline(buffer, "%s", compiler->colours.reg); -+ shader_addline(buffer, "%s", reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); - switch (reg->type) - { - case VKD3DSPR_TEMP: -@@ -1370,7 +1380,10 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, - struct vkd3d_string_buffer *buffer = &compiler->buffer; - const char *dimension; - -- if (compiler->dialect != VSIR_ASM_VSIR) -+ if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES)) -+ return; -+ -+ if (reg->data_type == VKD3D_DATA_UNUSED) - return; - - if (reg->dimension < ARRAY_SIZE(dimensions)) -@@ -1383,78 +1396,110 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, - shader_addline(buffer, ">"); - } - --static void shader_dump_dst_param(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_dst_param *param, bool is_declaration) -+static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, uint32_t mask, const char *suffix) -+{ -+ unsigned int i = 0; -+ char buffer[5]; -+ -+ if (mask == 0) -+ { -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s", prefix, suffix); -+ return; -+ } -+ -+ if (mask & VKD3DSP_WRITEMASK_0) -+ buffer[i++] = 'x'; -+ if (mask & VKD3DSP_WRITEMASK_1) -+ buffer[i++] = 'y'; -+ if (mask & VKD3DSP_WRITEMASK_2) -+ buffer[i++] = 'z'; -+ if (mask & VKD3DSP_WRITEMASK_3) -+ buffer[i++] = 'w'; -+ buffer[i++] = '\0'; -+ -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s.%s%s%s%s", prefix, -+ compiler->colours.write_mask, buffer, compiler->colours.reset, suffix); -+} -+ -+static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_dst_param *param, bool is_declaration, const char *suffix) - { -- struct vkd3d_string_buffer *buffer = &compiler->buffer; - uint32_t write_mask = param->write_mask; - -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s", prefix); - shader_dump_register(compiler, ¶m->reg, is_declaration); - - if (write_mask && param->reg.dimension == VSIR_DIMENSION_VEC4) - { -- static const char write_mask_chars[] = "xyzw"; -- - if (data_type_is_64_bit(param->reg.data_type)) - write_mask = vsir_write_mask_32_from_64(write_mask); - -- shader_addline(buffer, ".%s", compiler->colours.write_mask); -- if (write_mask & VKD3DSP_WRITEMASK_0) -- shader_addline(buffer, "%c", write_mask_chars[0]); -- if (write_mask & VKD3DSP_WRITEMASK_1) -- shader_addline(buffer, "%c", write_mask_chars[1]); -- if (write_mask & VKD3DSP_WRITEMASK_2) -- shader_addline(buffer, "%c", write_mask_chars[2]); -- if (write_mask & VKD3DSP_WRITEMASK_3) -- shader_addline(buffer, "%c", write_mask_chars[3]); -- shader_addline(buffer, "%s", compiler->colours.reset); -+ shader_print_write_mask(compiler, "", write_mask, ""); - } - - shader_print_precision(compiler, ¶m->reg); - shader_print_non_uniform(compiler, ¶m->reg); - shader_dump_reg_type(compiler, ¶m->reg); -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s", suffix); - } - --static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_src_param *param) -+static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix) - { - enum vkd3d_shader_src_modifier src_modifier = param->modifiers; - struct vkd3d_string_buffer *buffer = &compiler->buffer; - uint32_t swizzle = param->swizzle; -+ const char *modifier = ""; - - if (src_modifier == VKD3DSPSM_NEG - || src_modifier == VKD3DSPSM_BIASNEG - || src_modifier == VKD3DSPSM_SIGNNEG - || src_modifier == VKD3DSPSM_X2NEG - || src_modifier == VKD3DSPSM_ABSNEG) -- shader_addline(buffer, "-"); -+ modifier = "-"; - else if (src_modifier == VKD3DSPSM_COMP) -- shader_addline(buffer, "1-"); -+ modifier = "1-"; - else if (src_modifier == VKD3DSPSM_NOT) -- shader_addline(buffer, "!"); -+ modifier = "!"; -+ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, modifier); - - if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) -- shader_addline(buffer, "|"); -+ vkd3d_string_buffer_printf(buffer, "|"); - - shader_dump_register(compiler, ¶m->reg, false); - - switch (src_modifier) - { -- case VKD3DSPSM_NONE: break; -- case VKD3DSPSM_NEG: break; -- case VKD3DSPSM_NOT: break; -- case VKD3DSPSM_BIAS: shader_addline(buffer, "_bias"); break; -- case VKD3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break; -- case VKD3DSPSM_SIGN: shader_addline(buffer, "_bx2"); break; -- case VKD3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break; -- case VKD3DSPSM_COMP: break; -- case VKD3DSPSM_X2: shader_addline(buffer, "_x2"); break; -- case VKD3DSPSM_X2NEG: shader_addline(buffer, "_x2"); break; -- case VKD3DSPSM_DZ: shader_addline(buffer, "_dz"); break; -- case VKD3DSPSM_DW: shader_addline(buffer, "_dw"); break; -+ case VKD3DSPSM_NONE: -+ case VKD3DSPSM_NEG: -+ case VKD3DSPSM_COMP: -+ case VKD3DSPSM_ABS: - case VKD3DSPSM_ABSNEG: -- case VKD3DSPSM_ABS: /* handled later */ break; -- default: shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier); -+ case VKD3DSPSM_NOT: -+ break; -+ case VKD3DSPSM_BIAS: -+ case VKD3DSPSM_BIASNEG: -+ vkd3d_string_buffer_printf(buffer, "_bias"); -+ break; -+ case VKD3DSPSM_SIGN: -+ case VKD3DSPSM_SIGNNEG: -+ vkd3d_string_buffer_printf(buffer, "_bx2"); -+ break; -+ case VKD3DSPSM_X2: -+ case VKD3DSPSM_X2NEG: -+ vkd3d_string_buffer_printf(buffer, "_x2"); -+ break; -+ case VKD3DSPSM_DZ: -+ vkd3d_string_buffer_printf(buffer, "_dz"); -+ break; -+ case VKD3DSPSM_DW: -+ vkd3d_string_buffer_printf(buffer, "_dw"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_%s%s", -+ compiler->colours.error, src_modifier, compiler->colours.reset); -+ break; - } - - if (param->reg.type != VKD3DSPR_IMMCONST && param->reg.type != VKD3DSPR_IMMCONST64 -@@ -1472,26 +1517,22 @@ static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, - swizzle_z = vsir_swizzle_get_component(swizzle, 2); - swizzle_w = vsir_swizzle_get_component(swizzle, 3); - -- if (swizzle_x == swizzle_y -- && swizzle_x == swizzle_z -- && swizzle_x == swizzle_w) -- { -- shader_addline(buffer, ".%s%c%s", compiler->colours.swizzle, -+ if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) -+ vkd3d_string_buffer_printf(buffer, ".%s%c%s", compiler->colours.swizzle, - swizzle_chars[swizzle_x], compiler->colours.reset); -- } - else -- { -- shader_addline(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, -+ vkd3d_string_buffer_printf(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, - swizzle_chars[swizzle_x], swizzle_chars[swizzle_y], - swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset); -- } - } -+ - if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) -- shader_addline(buffer, "|"); -+ vkd3d_string_buffer_printf(buffer, "|"); - - shader_print_precision(compiler, ¶m->reg); - shader_print_non_uniform(compiler, ¶m->reg); - shader_dump_reg_type(compiler, ¶m->reg); -+ vkd3d_string_buffer_printf(buffer, "%s", suffix); - } - - static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, -@@ -1765,11 +1806,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - compiler->current = ins; - - if (ins->predicate) -- { -- vkd3d_string_buffer_printf(buffer, "("); -- shader_dump_src_param(compiler, ins->predicate); -- vkd3d_string_buffer_printf(buffer, ") "); -- } -+ shader_print_src_param(compiler, "(", ins->predicate, ") "); - - /* PixWin marks instructions with the coissue flag with a '+' */ - if (ins->coissue) -@@ -1823,8 +1860,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3DSIH_DCL_INDEX_RANGE: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.index_range.dst, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.index_range.dst, true, ""); - shader_print_uint_literal(compiler, " ", ins->declaration.index_range.register_count, ""); - break; - -@@ -1842,16 +1878,14 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - case VKD3DSIH_DCL_INPUT_PS: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_interpolation_mode(compiler, ins->flags); -- shader_addline(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.dst, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); - break; - - case VKD3DSIH_DCL_INPUT_PS_SGV: - case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_INPUT_SIV: - case VKD3DSIH_DCL_OUTPUT_SIV: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); - shader_addline(buffer, ", "); - shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); - break; -@@ -1859,16 +1893,14 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - case VKD3DSIH_DCL_INPUT_PS_SIV: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_interpolation_mode(compiler, ins->flags); -- shader_addline(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); - shader_addline(buffer, ", "); - shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); - break; - - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.dst, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); - break; - - case VKD3DSIH_DCL_INPUT_PRIMITIVE: -@@ -1885,14 +1917,12 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3DSIH_DCL_RESOURCE_RAW: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); - shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); - break; - - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); - shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); - break; -@@ -1916,29 +1946,24 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_tessellator_domain(compiler, ins->declaration.tessellator_domain); -+ shader_print_tessellator_domain(compiler, " ", ins->declaration.tessellator_domain, ""); - break; - - case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_tessellator_output_primitive(compiler, ins->declaration.tessellator_output_primitive); -+ shader_print_tessellator_output_primitive(compiler, " ", ins->declaration.tessellator_output_primitive, ""); - break; - - case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_tessellator_partitioning(compiler, ins->declaration.tessellator_partitioning); -+ shader_print_tessellator_partitioning(compiler, " ", ins->declaration.tessellator_partitioning, ""); - break; - - case VKD3DSIH_DCL_TGSM_RAW: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.tgsm_raw.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_raw.reg, true, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_raw.byte_count, ""); - break; - - case VKD3DSIH_DCL_TGSM_STRUCTURED: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.tgsm_structured.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_structured.reg, true, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.byte_stride, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.structure_count, ""); - break; -@@ -1951,15 +1976,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - - case VKD3DSIH_DCL_UAV_RAW: - shader_dump_uav_flags(compiler, ins->flags); -- shader_addline(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); - shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); - break; - - case VKD3DSIH_DCL_UAV_STRUCTURED: - shader_dump_uav_flags(compiler, ins->flags); -- shader_addline(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); - shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); - break; -@@ -2021,15 +2044,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - for (i = 0; i < ins->dst_count; ++i) - { - shader_dump_ins_modifiers(compiler, &ins->dst[i]); -- shader_addline(buffer, !i ? " " : ", "); -- shader_dump_dst_param(compiler, &ins->dst[i], false); -+ shader_print_dst_param(compiler, !i ? " " : ", ", &ins->dst[i], false, ""); - } - - /* Other source tokens */ - for (i = ins->dst_count; i < (ins->dst_count + ins->src_count); ++i) - { -- shader_addline(buffer, !i ? " " : ", "); -- shader_dump_src_param(compiler, &ins->src[i - ins->dst_count]); -+ shader_print_src_param(compiler, !i ? " " : ", ", &ins->src[i - ins->dst_count], ""); - } - break; - } -@@ -2037,21 +2058,186 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - shader_addline(buffer, "\n"); - } - --enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, -+static const char *get_sysval_semantic_name(enum vkd3d_shader_sysval_semantic semantic) -+{ -+ switch (semantic) -+ { -+ case VKD3D_SHADER_SV_NONE: return "NONE"; -+ case VKD3D_SHADER_SV_POSITION: return "POS"; -+ case VKD3D_SHADER_SV_CLIP_DISTANCE: return "CLIPDST"; -+ case VKD3D_SHADER_SV_CULL_DISTANCE: return "CULLDST"; -+ case VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX: return "RTINDEX"; -+ case VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX: return "VPINDEX"; -+ case VKD3D_SHADER_SV_VERTEX_ID: return "VERTID"; -+ case VKD3D_SHADER_SV_PRIMITIVE_ID: return "PRIMID"; -+ case VKD3D_SHADER_SV_INSTANCE_ID: return "INSTID"; -+ case VKD3D_SHADER_SV_IS_FRONT_FACE: return "FFACE"; -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: return "SAMPLE"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: return "QUADEDGE"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: return "QUADINT"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: return "TRIEDGE"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: return "TRIINT"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: return "LINEDET"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: return "LINEDEN"; -+ case VKD3D_SHADER_SV_TARGET: return "TARGET"; -+ case VKD3D_SHADER_SV_DEPTH: return "DEPTH"; -+ case VKD3D_SHADER_SV_COVERAGE: return "COVERAGE"; -+ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "DEPTHGE"; -+ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "DEPTHLE"; -+ case VKD3D_SHADER_SV_STENCIL_REF: return "STENCILREF"; -+ default: return "??"; -+ } -+} -+ -+static const char *get_component_type_name(enum vkd3d_shader_component_type type) -+{ -+ switch (type) -+ { -+ case VKD3D_SHADER_COMPONENT_VOID: return "void"; -+ case VKD3D_SHADER_COMPONENT_UINT: return "uint"; -+ case VKD3D_SHADER_COMPONENT_INT: return "int"; -+ case VKD3D_SHADER_COMPONENT_FLOAT: return "float"; -+ case VKD3D_SHADER_COMPONENT_BOOL: return "bool"; -+ case VKD3D_SHADER_COMPONENT_DOUBLE: return "double"; -+ case VKD3D_SHADER_COMPONENT_UINT64: return "uint64"; -+ default: return "??"; -+ } -+} -+ -+static const char *get_minimum_precision_name(enum vkd3d_shader_minimum_precision prec) -+{ -+ switch (prec) -+ { -+ case VKD3D_SHADER_MINIMUM_PRECISION_NONE: return "NONE"; -+ case VKD3D_SHADER_MINIMUM_PRECISION_FLOAT_16: return "FLOAT_16"; -+ case VKD3D_SHADER_MINIMUM_PRECISION_FIXED_8_2: return "FIXED_8_2"; -+ case VKD3D_SHADER_MINIMUM_PRECISION_INT_16: return "INT_16"; -+ case VKD3D_SHADER_MINIMUM_PRECISION_UINT_16: return "UINT_16"; -+ default: return "??"; -+ } -+} -+ -+static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic semantic) -+{ -+ switch (semantic) -+ { -+ case VKD3D_SHADER_SV_DEPTH: return "oDepth"; -+ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; -+ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; -+ /* SV_Coverage has name vCoverage when used as an input, -+ * but it doens't appear in the signature in that case. */ -+ case VKD3D_SHADER_SV_COVERAGE: return "oMask"; -+ case VKD3D_SHADER_SV_STENCIL_REF: return "oStencilRef"; -+ default: return "??"; -+ } -+} -+ -+static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *name, const char *register_name, const struct shader_signature *signature) -+{ -+ struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ unsigned int i; -+ -+ if (signature->element_count == 0) -+ return VKD3D_OK; -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s\n", -+ compiler->colours.opcode, name, compiler->colours.reset); -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ struct signature_element *element = &signature->elements[i]; -+ -+ vkd3d_string_buffer_printf(buffer, "%s.param%s %s", compiler->colours.opcode, -+ compiler->colours.reset, element->semantic_name); -+ -+ if (element->semantic_index != 0) -+ vkd3d_string_buffer_printf(buffer, "%u", element->semantic_index); -+ -+ if (element->register_index != -1) -+ { -+ shader_print_write_mask(compiler, "", element->mask, ""); -+ vkd3d_string_buffer_printf(buffer, ", %s%s%d%s", compiler->colours.reg, -+ register_name, element->register_index, compiler->colours.reset); -+ shader_print_write_mask(compiler, "", element->used_mask, ""); -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, ", %s%s%s", compiler->colours.reg, -+ get_semantic_register_name(element->sysval_semantic), compiler->colours.reset); -+ } -+ -+ if (!element->component_type && !element->sysval_semantic -+ && !element->min_precision && !element->stream_index) -+ goto done; -+ -+ vkd3d_string_buffer_printf(buffer, ", %s", -+ get_component_type_name(element->component_type)); -+ -+ if (!element->sysval_semantic && !element->min_precision && !element->stream_index) -+ goto done; -+ -+ vkd3d_string_buffer_printf(buffer, ", %s", -+ get_sysval_semantic_name(element->sysval_semantic)); -+ -+ if (!element->min_precision && !element->stream_index) -+ goto done; -+ -+ vkd3d_string_buffer_printf(buffer, ", %s", -+ get_minimum_precision_name(element->min_precision)); -+ -+ if (!element->stream_index) -+ goto done; -+ -+ vkd3d_string_buffer_printf(buffer, ", m%u", -+ element->stream_index); -+ -+ done: -+ vkd3d_string_buffer_printf(buffer, "\n"); -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, -+ const struct vsir_program *program) -+{ -+ enum vkd3d_result ret; -+ -+ if ((ret = dump_signature(compiler, ".input", -+ program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", -+ &program->input_signature)) < 0) -+ return ret; -+ -+ if ((ret = dump_signature(compiler, ".output", "o", -+ &program->output_signature)) < 0) -+ return ret; -+ -+ if ((ret = dump_signature(compiler, ".patch_constant", -+ program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", -+ &program->patch_constant_signature)) < 0) -+ return ret; -+ -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s.text%s\n", -+ compiler->colours.opcode, compiler->colours.reset); -+ -+ return VKD3D_OK; -+} -+ -+enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect) -+ struct vkd3d_shader_code *out, enum vsir_asm_flags flags) - { - const struct vkd3d_shader_version *shader_version = &program->shader_version; - enum vkd3d_shader_compile_option_formatting_flags formatting; - struct vkd3d_d3d_asm_compiler compiler = - { -- .dialect = dialect, -+ .flags = flags, - }; - enum vkd3d_result result = VKD3D_OK; - struct vkd3d_string_buffer *buffer; - unsigned int indent, i, j; - const char *indent_str; -- void *code; - - static const struct vkd3d_d3d_asm_colours no_colours = - { -@@ -2064,6 +2250,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - .swizzle = "", - .version = "", - .write_mask = "", -+ .label = "", - }; - static const struct vkd3d_d3d_asm_colours colours = - { -@@ -2076,6 +2263,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - .swizzle = "\x1b[93m", - .version = "\x1b[36m", - .write_mask = "\x1b[93m", -+ .label = "\x1b[91m", - }; - - formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT -@@ -2109,6 +2297,17 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - shader_get_type_prefix(shader_version->type), shader_version->major, - shader_version->minor, compiler.colours.reset); - -+ /* The signatures we emit only make sense for DXBC shaders. D3DBC -+ * doesn't even have an explicit concept of signature. */ -+ if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) -+ { -+ if ((result = dump_signatures(&compiler, program)) < 0) -+ { -+ vkd3d_string_buffer_cleanup(buffer); -+ return result; -+ } -+ } -+ - indent = 0; - for (i = 0; i < program->instructions.count; ++i) - { -@@ -2124,6 +2323,10 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - --indent; - break; - -+ case VKD3DSIH_LABEL: -+ indent = 0; -+ break; -+ - default: - break; - } -@@ -2142,6 +2345,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - case VKD3DSIH_IFC: - case VKD3DSIH_LOOP: - case VKD3DSIH_SWITCH: -+ case VKD3DSIH_LABEL: - ++indent; - break; - -@@ -2150,18 +2354,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - } - } - -- if ((code = vkd3d_malloc(buffer->content_size))) -- { -- memcpy(code, buffer->buffer, buffer->content_size); -- out->size = buffer->content_size; -- out->code = code; -- } -- else -- { -- result = VKD3D_ERROR_OUT_OF_MEMORY; -- } -- -- vkd3d_string_buffer_cleanup(buffer); -+ vkd3d_shader_code_from_string_buffer(out, buffer); - - return result; - } -@@ -2171,7 +2364,7 @@ void vkd3d_shader_trace(const struct vsir_program *program) - const char *p, *q, *end; - struct vkd3d_shader_code code; - -- if (vkd3d_dxbc_binary_to_text(program, NULL, &code, VSIR_ASM_VSIR) != VKD3D_OK) -+ if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) - return; - - end = (const char *)code.code + code.size; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 27f5c810436..3b935b07d61 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -541,9 +541,9 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - struct signature_element *element; - - if (output) -- signature = &sm1->p.shader_desc.output_signature; -+ signature = &sm1->p.program.output_signature; - else -- signature = &sm1->p.shader_desc.input_signature; -+ signature = &sm1->p.program.input_signature; - - if ((element = find_signature_element(signature, name, index))) - { -@@ -581,9 +581,9 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, - struct signature_element *element; - - if (output) -- signature = &sm1->p.shader_desc.output_signature; -+ signature = &sm1->p.program.output_signature; - else -- signature = &sm1->p.shader_desc.input_signature; -+ signature = &sm1->p.program.input_signature; - - if (!(element = find_signature_element_by_register_index(signature, register_index))) - { -@@ -886,7 +886,6 @@ static void shader_sm1_destroy(struct vkd3d_shader_parser *parser) - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); - - vsir_program_cleanup(&parser->program); -- free_shader_desc(&sm1->p.shader_desc); - vkd3d_free(sm1); - } - -@@ -1237,7 +1236,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, - const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; - const uint32_t *code = compile_info->source.code; - size_t code_size = compile_info->source.size; -- struct vkd3d_shader_desc *shader_desc; - struct vkd3d_shader_version version; - uint16_t shader_type; - size_t token_count; -@@ -1290,9 +1288,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, - if (!vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops, - code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- shader_desc = &sm1->p.shader_desc; -- shader_desc->byte_code = code; -- shader_desc->byte_code_size = code_size; - sm1->ptr = sm1->start; - - return VKD3D_OK; -@@ -1363,7 +1358,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); - - if (!sm1->p.failed) -- ret = vsir_validate(&sm1->p); -+ ret = vkd3d_shader_parser_validate(&sm1->p); - - if (sm1->p.failed && ret >= 0) - ret = VKD3D_ERROR_INVALID_SHADER; -@@ -1499,12 +1494,12 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns - return D3DPS_VERSION(major, minor); - } - --static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) -+D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - { - switch (type->class) - { - case HLSL_CLASS_ARRAY: -- return sm1_class(type->e.array.type); -+ return hlsl_sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) -@@ -1525,7 +1520,7 @@ static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) - } - } - --static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) -+D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - { - switch (type->base_type) - { -@@ -1620,7 +1615,7 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ - } - } - -- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); -@@ -1977,16 +1972,13 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- /* Integers are internally represented as floats, so no change is necessary.*/ -+ case HLSL_TYPE_BOOL: -+ /* Integrals are internally represented as floats, so no change is necessary.*/ - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - -- case HLSL_TYPE_BOOL: -- hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to float."); -- break; -- - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); - break; -@@ -2002,7 +1994,10 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- /* A compilation pass applies a FLOOR operation to casts to int, so no change is necessary. */ -+ /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not -+ * reach this case unless we are missing something. */ -+ hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); -+ break; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -@@ -2067,6 +2062,9 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - D3DDECLUSAGE usage; - bool ret; - -+ if ((!output && !var->last_read) || (output && !var->first_write)) -+ return; -+ - if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) - { - usage = 0; -@@ -2242,6 +2240,12 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - - assert(instr->reg.allocated); - -+ if (expr->op == HLSL_OP1_REINTERPRET) -+ { -+ write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ return; -+ } -+ - if (expr->op == HLSL_OP1_CAST) - { - write_sm1_cast(ctx, buffer, instr); -@@ -2329,7 +2333,15 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - break; - -+ case HLSL_OP2_SLT: -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); -+ write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ - case HLSL_OP3_CMP: -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -+ hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); - write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - -@@ -2488,7 +2500,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - - if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) - { -- FIXME("Matrix writemasks need to be lowered.\n"); -+ hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks.\n"); - return; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 37ebc73c099..8a1012d909b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -150,7 +150,7 @@ static const char *shader_get_string(const char *data, size_t data_size, size_t - } - - static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, -- const char *source_name, struct vkd3d_shader_dxbc_desc *desc) -+ const char *source_name, uint32_t flags, struct vkd3d_shader_dxbc_desc *desc) - { - const struct vkd3d_shader_location location = {.source_name = source_name}; - struct vkd3d_shader_dxbc_section_desc *sections, *section; -@@ -186,17 +186,20 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ - checksum[1] = read_u32(&ptr); - checksum[2] = read_u32(&ptr); - checksum[3] = read_u32(&ptr); -- vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); -- if (memcmp(checksum, calculated_checksum, sizeof(checksum))) -- { -- WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " -- "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", -- checksum[0], checksum[1], checksum[2], checksum[3], -- calculated_checksum[0], calculated_checksum[1], -- calculated_checksum[2], calculated_checksum[3]); -- vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, -- "Invalid DXBC checksum."); -- return VKD3D_ERROR_INVALID_ARGUMENT; -+ if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) -+ { -+ vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); -+ if (memcmp(checksum, calculated_checksum, sizeof(checksum))) -+ { -+ WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " -+ "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", -+ checksum[0], checksum[1], checksum[2], checksum[3], -+ calculated_checksum[0], calculated_checksum[1], -+ calculated_checksum[2], calculated_checksum[3]); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, -+ "Invalid DXBC checksum."); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } - } - - version = read_u32(&ptr); -@@ -287,7 +290,7 @@ static int for_each_dxbc_section(const struct vkd3d_shader_code *dxbc, - unsigned int i; - int ret; - -- if ((ret = parse_dxbc(dxbc, message_context, source_name, &desc)) < 0) -+ if ((ret = parse_dxbc(dxbc, message_context, source_name, 0, &desc)) < 0) - return ret; - - for (i = 0; i < desc.section_count; ++i) -@@ -313,7 +316,7 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, - *messages = NULL; - vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); - -- ret = parse_dxbc(dxbc, &message_context, NULL, desc); -+ ret = parse_dxbc(dxbc, &message_context, NULL, flags, desc); - - vkd3d_shader_message_context_trace_messages(&message_context); - if (!vkd3d_shader_message_context_copy_messages(&message_context, messages) && ret >= 0) -@@ -485,7 +488,7 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - struct vkd3d_shader_message_context *message_context, void *context) - { -- struct vkd3d_shader_desc *desc = context; -+ struct dxbc_shader_desc *desc = context; - int ret; - - switch (section->tag) -@@ -550,7 +553,7 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - return VKD3D_OK; - } - --void free_shader_desc(struct vkd3d_shader_desc *desc) -+void free_dxbc_shader_desc(struct dxbc_shader_desc *desc) - { - shader_signature_cleanup(&desc->input_signature); - shader_signature_cleanup(&desc->output_signature); -@@ -558,7 +561,7 @@ void free_shader_desc(struct vkd3d_shader_desc *desc) - } - - int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, -- struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) -+ struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc) - { - int ret; - -@@ -569,7 +572,7 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - if (ret < 0) - { - WARN("Failed to parse shader, vkd3d result %d.\n", ret); -- free_shader_desc(desc); -+ free_dxbc_shader_desc(desc); - } - - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 26a8a5c1cc3..7f9a74fa737 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -342,6 +342,8 @@ enum dx_intrinsic_opcode - { - DX_LOAD_INPUT = 4, - DX_STORE_OUTPUT = 5, -+ DX_FABS = 6, -+ DX_SATURATE = 7, - DX_ISNAN = 8, - DX_ISINF = 9, - DX_ISFINITE = 10, -@@ -374,6 +376,10 @@ enum dx_intrinsic_opcode - DX_IMIN = 38, - DX_UMAX = 39, - DX_UMIN = 40, -+ DX_FMAD = 46, -+ DX_FMA = 47, -+ DX_IMAD = 48, -+ DX_UMAD = 49, - DX_IBFE = 51, - DX_UBFE = 52, - DX_CREATE_HANDLE = 57, -@@ -388,9 +394,13 @@ enum dx_intrinsic_opcode - DX_TEXTURE_STORE = 67, - DX_BUFFER_LOAD = 68, - DX_BUFFER_STORE = 69, -+ DX_BUFFER_UPDATE_COUNTER = 70, - DX_GET_DIMENSIONS = 72, -+ DX_TEXTURE_GATHER = 73, -+ DX_TEXTURE_GATHER_CMP = 74, - DX_ATOMIC_BINOP = 78, - DX_ATOMIC_CMP_XCHG = 79, -+ DX_BARRIER = 80, - DX_DERIV_COARSEX = 83, - DX_DERIV_COARSEY = 84, - DX_DERIV_FINEX = 85, -@@ -463,6 +473,14 @@ enum dxil_atomic_binop_code - ATOMIC_BINOP_INVALID, - }; - -+enum dxil_sync_flags -+{ -+ SYNC_THREAD_GROUP = 0x1, -+ SYNC_GLOBAL_UAV = 0x2, -+ SYNC_THREAD_GROUP_UAV = 0x4, -+ SYNC_GROUP_SHARED_MEMORY = 0x8, -+}; -+ - struct sm6_pointer_info - { - const struct sm6_type *type; -@@ -541,6 +559,7 @@ struct sm6_value - { - const struct sm6_type *type; - enum sm6_value_type value_type; -+ unsigned int structure_stride; - bool is_undefined; - union - { -@@ -753,6 +772,7 @@ struct sm6_parser - - unsigned int indexable_temp_count; - unsigned int icb_count; -+ unsigned int tgsm_count; - - struct sm6_value *values; - size_t value_count; -@@ -2110,6 +2130,15 @@ static inline bool sm6_value_is_undef(const struct sm6_value *value) - return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; - } - -+static bool sm6_value_vector_is_constant_or_undef(const struct sm6_value **values, unsigned int count) -+{ -+ unsigned int i; -+ for (i = 0; i < count; ++i) -+ if (!sm6_value_is_constant(values[i]) && !sm6_value_is_undef(values[i])) -+ return false; -+ return true; -+} -+ - static bool sm6_value_is_icb(const struct sm6_value *value) - { - return value->value_type == VALUE_TYPE_ICB; -@@ -2199,6 +2228,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type - return VKD3D_DATA_BOOL; - case 8: - return VKD3D_DATA_UINT8; -+ case 16: -+ return VKD3D_DATA_UINT16; - case 32: - return VKD3D_DATA_UINT; - case 64: -@@ -2212,6 +2243,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type - { - switch (type->u.width) - { -+ case 16: -+ return VKD3D_DATA_HALF; - case 32: - return VKD3D_DATA_FLOAT; - case 64: -@@ -2252,6 +2285,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st - register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6); - } - -+static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value) -+{ -+ vsir_register_init(reg, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); -+ reg->u.immconst_u32[0] = value; -+} -+ - static void dst_param_init(struct vkd3d_shader_dst_param *param) - { - param->write_mask = VKD3DSP_WRITEMASK_0; -@@ -2315,6 +2354,12 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, - param->reg = *reg; - } - -+static void src_param_make_constant_uint(struct vkd3d_shader_src_param *param, unsigned int value) -+{ -+ src_param_init(param); -+ register_make_constant_uint(¶m->reg, value); -+} -+ - static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, - struct sm6_parser *sm6) - { -@@ -2336,14 +2381,18 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, - } - } - --static void instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) -+static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) - { -- struct vkd3d_shader_dst_param *param = instruction_dst_params_alloc(ins, 1, sm6); - struct sm6_value *dst = sm6_parser_get_current_value(sm6); -+ struct vkd3d_shader_dst_param *param; -+ -+ if (!(param = instruction_dst_params_alloc(ins, 1, sm6))) -+ return false; - - dst_param_init_ssa_scalar(param, dst->type, dst, sm6); - param->write_mask = VKD3DSP_WRITEMASK_0; - dst->u.reg = param->reg; -+ return true; - } - - static void instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instruction *ins, -@@ -2876,7 +2925,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - } - - if (type->u.width == 16) -- FIXME("Half float type is not supported yet.\n"); -+ dst->u.reg.u.immconst_u32[0] = record->operands[0]; - else if (type->u.width == 32) - dst->u.reg.u.immconst_f32[0] = bitcast_uint64_to_float(record->operands[0]); - else if (type->u.width == 64) -@@ -2994,6 +3043,58 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru - register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); - } - -+static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type, -+ unsigned int alignment, unsigned int init, struct sm6_value *dst) -+{ -+ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); -+ struct vkd3d_shader_instruction *ins; -+ unsigned int byte_count; -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW); -+ dst_param_init(&ins->declaration.tgsm_raw.reg); -+ register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); -+ dst->u.reg = ins->declaration.tgsm_raw.reg.reg; -+ dst->structure_stride = 0; -+ ins->declaration.tgsm_raw.alignment = alignment; -+ byte_count = elem_type->u.width / 8u; -+ if (byte_count != 4) -+ { -+ FIXME("Unsupported byte count %u.\n", byte_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Raw TGSM byte count %u is not supported.", byte_count); -+ } -+ ins->declaration.tgsm_raw.byte_count = byte_count; -+ /* The initialiser value index will be resolved later when forward references can be handled. */ -+ ins->flags = init; -+} -+ -+static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const struct sm6_type *elem_type, -+ unsigned int count, unsigned int alignment, unsigned int init, struct sm6_value *dst) -+{ -+ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); -+ struct vkd3d_shader_instruction *ins; -+ unsigned int structure_stride; -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_STRUCTURED); -+ dst_param_init(&ins->declaration.tgsm_structured.reg); -+ register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM, -+ data_type, sm6->tgsm_count++); -+ dst->u.reg = ins->declaration.tgsm_structured.reg.reg; -+ structure_stride = elem_type->u.width / 8u; -+ if (structure_stride != 4) -+ { -+ FIXME("Unsupported structure stride %u.\n", structure_stride); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Structured TGSM byte stride %u is not supported.", structure_stride); -+ } -+ dst->structure_stride = structure_stride; -+ ins->declaration.tgsm_structured.alignment = alignment; -+ ins->declaration.tgsm_structured.byte_stride = structure_stride; -+ ins->declaration.tgsm_structured.structure_count = count; -+ /* The initialiser value index will be resolved later when forward references can be handled. */ -+ ins->flags = init; -+} -+ - static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) - { - const struct sm6_type *type, *scalar_type; -@@ -3119,10 +3220,17 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ - } - else if (address_space == ADDRESS_SPACE_GROUPSHARED) - { -- FIXME("Unsupported TGSM.\n"); -- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -- "TGSM global variables are not supported."); -- return false; -+ if (!sm6_type_is_numeric(scalar_type)) -+ { -+ WARN("Unsupported type class %u.\n", scalar_type->class); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "TGSM variables of type class %u are not supported.", scalar_type->class); -+ return false; -+ } -+ if (count == 1) -+ sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); -+ else -+ sm6_parser_declare_tgsm_structured(sm6, scalar_type, count, alignment, init, dst); - } - else - { -@@ -3158,6 +3266,38 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init - return NULL; - } - -+static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6) -+{ -+ const struct sm6_value *value; -+ -+ if (!index) -+ return false; -+ -+ --index; -+ if (!(value = sm6_parser_get_value_safe(sm6, index)) -+ || (!sm6_value_is_icb(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) -+ { -+ WARN("Invalid initialiser index %zu.\n", index); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "TGSM initialiser value index %zu is invalid.", index); -+ return false; -+ } -+ else if ((sm6_value_is_icb(value) && value->u.icb->is_null) || sm6_value_is_constant_zero(value)) -+ { -+ return true; -+ } -+ else if (sm6_value_is_undef(value)) -+ { -+ /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */ -+ return false; -+ } -+ -+ FIXME("Non-zero initialisers are not supported.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Non-zero TGSM initialisers are not supported."); -+ return false; -+} -+ - static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - { - size_t i, count, base_value_idx = sm6->value_count; -@@ -3231,6 +3371,16 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - { - ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); - } -+ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) -+ { -+ ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); -+ ins->flags = 0; -+ } -+ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) -+ { -+ ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); -+ ins->flags = 0; -+ } - } - for (i = base_value_idx; i < sm6->value_count; ++i) - { -@@ -3974,6 +4124,64 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr - dst->u.reg = dst_params[0].reg; - } - -+static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ enum dxil_sync_flags flags; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SYNC); -+ flags = sm6_value_get_constant_uint(operands[0]); -+ ins->flags = flags & (SYNC_THREAD_GROUP | SYNC_THREAD_GROUP_UAV); -+ if (flags & SYNC_GLOBAL_UAV) -+ ins->flags |= VKD3DSSF_GLOBAL_UAV; -+ if (flags & SYNC_GROUP_SHARED_MEMORY) -+ ins->flags |= VKD3DSSF_GROUP_SHARED_MEMORY; -+ if (flags &= ~(SYNC_THREAD_GROUP | SYNC_GLOBAL_UAV | SYNC_THREAD_GROUP_UAV | SYNC_GROUP_SHARED_MEMORY)) -+ { -+ FIXME("Unhandled flags %#x.\n", flags); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Barrier flags %#x are unhandled.", flags); -+ } -+} -+ -+static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_params; -+ const struct sm6_value *resource; -+ unsigned int i; -+ int8_t inc; -+ -+ resource = operands[0]; -+ if (!sm6_value_validate_is_handle(resource, sm6)) -+ return; -+ -+ if (!sm6_value_is_constant(operands[1])) -+ { -+ FIXME("Unsupported dynamic update operand.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "A dynamic update value for a UAV counter operation is not supported."); -+ return; -+ } -+ i = sm6_value_get_constant_uint(operands[1]); -+ if (i != 1 && i != 255) -+ { -+ WARN("Unexpected update value %#x.\n", i); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Update value %#x for a UAV counter operation is not supported.", i); -+ } -+ inc = i; -+ -+ vsir_instruction_init(ins, &sm6->p.location, (inc < 0) ? VKD3DSIH_IMM_ATOMIC_CONSUME : VKD3DSIH_IMM_ATOMIC_ALLOC); -+ if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_vector_from_reg(&src_params[0], &resource->u.handle.reg); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4065,6 +4273,53 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int - ins->handler_idx = VKD3DSIH_NOP; - } - -+static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ src_param->modifiers = VKD3DSPSM_ABS; -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static enum vkd3d_shader_opcode sm6_dx_map_ma_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) -+{ -+ switch (op) -+ { -+ case DX_FMA: -+ return VKD3DSIH_DFMA; -+ case DX_FMAD: -+ return VKD3DSIH_MAD; -+ case DX_IMAD: -+ case DX_UMAD: -+ return VKD3DSIH_IMAD; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void sm6_parser_emit_dx_ma(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_params; -+ unsigned int i; -+ -+ vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_ma_op(op, operands[0]->type)); -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; -+ for (i = 0; i < 3; ++i) -+ src_param_init_from_value(&src_params[i], operands[i]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4182,7 +4437,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin - - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); - -- signature = &sm6->p.shader_desc.input_signature; -+ signature = &sm6->p.program.input_signature; - if (row_index >= signature->element_count) - { - WARN("Invalid row index %u.\n", row_index); -@@ -4521,6 +4776,21 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ - instruction_dst_param_init_ssa_vector(ins, component_count, sm6); - } - -+static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ if (instruction_dst_param_init_ssa_scalar(ins, sm6)) -+ ins->dst->modifiers = VKD3DSPDM_SATURATE; -+} -+ - static void sm6_parser_emit_dx_sincos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4572,7 +4842,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr - row_index = sm6_value_get_constant_uint(operands[0]); - column_index = sm6_value_get_constant_uint(operands[2]); - -- signature = &sm6->p.shader_desc.output_signature; -+ signature = &sm6->p.program.output_signature; - if (row_index >= signature->element_count) - { - WARN("Invalid row index %u.\n", row_index); -@@ -4612,6 +4882,68 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr - src_param_init_from_value(src_param, value); - } - -+static void sm6_parser_emit_dx_texture_gather(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_register coord, offset; -+ const struct sm6_value *resource, *sampler; -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int swizzle; -+ bool extended_offset; -+ -+ resource = operands[0]; -+ sampler = operands[1]; -+ if (!sm6_value_validate_is_texture_handle(resource, op, sm6) -+ || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) -+ { -+ return; -+ } -+ -+ if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], VKD3D_VEC4_SIZE, NULL, state, &coord)) -+ return; -+ -+ if ((extended_offset = !sm6_value_vector_is_constant_or_undef(&operands[6], 2)) -+ && !sm6_parser_emit_coordinate_construct(sm6, &operands[6], 2, NULL, state, &offset)) -+ { -+ return; -+ } -+ -+ ins = state->ins; -+ if (op == DX_TEXTURE_GATHER) -+ { -+ instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO : VKD3DSIH_GATHER4, resource, sm6); -+ if (!(src_params = instruction_src_params_alloc(ins, 3 + extended_offset, sm6))) -+ return; -+ } -+ else -+ { -+ instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO_C : VKD3DSIH_GATHER4_C, resource, sm6); -+ if (!(src_params = instruction_src_params_alloc(ins, 4 + extended_offset, sm6))) -+ return; -+ src_param_init_from_value(&src_params[3 + extended_offset], operands[9]); -+ } -+ -+ src_param_init_vector_from_reg(&src_params[0], &coord); -+ if (extended_offset) -+ src_param_init_vector_from_reg(&src_params[1], &offset); -+ else -+ instruction_set_texel_offset(ins, &operands[6], sm6); -+ src_param_init_vector_from_reg(&src_params[1 + extended_offset], &resource->u.handle.reg); -+ src_param_init_vector_from_reg(&src_params[2 + extended_offset], &sampler->u.handle.reg); -+ /* Swizzle stored in the sampler parameter is the scalar component index to be gathered. */ -+ swizzle = sm6_value_get_constant_uint(operands[8]); -+ if (swizzle >= VKD3D_VEC4_SIZE) -+ { -+ WARN("Invalid swizzle %#x.\n", swizzle); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Swizzle %#x for a texture gather operation is invalid.", swizzle); -+ } -+ src_params[2 + extended_offset].swizzle = swizzle; -+ -+ instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); -+} -+ - static void sm6_parser_emit_dx_texture_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4741,9 +5073,11 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop}, - [DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop}, -+ [DX_BARRIER ] = {"v", "c", sm6_parser_emit_dx_barrier}, - [DX_BFREV ] = {"m", "R", sm6_parser_emit_dx_unary}, - [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, - [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store}, -+ [DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter}, - [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, - [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, - [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, -@@ -4753,9 +5087,12 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, - [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, - [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, - [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, -+ [DX_FMA ] = {"g", "RRR", sm6_parser_emit_dx_ma}, -+ [DX_FMAD ] = {"g", "RRR", sm6_parser_emit_dx_ma}, - [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, - [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, - [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, -@@ -4764,6 +5101,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_HTAN ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_IMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, - [DX_IMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, - [DX_IMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, - [DX_ISFINITE ] = {"1", "g", sm6_parser_emit_dx_unary}, -@@ -4786,14 +5124,18 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_SAMPLE_C_LZ ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_GRAD ] = {"o", "HHffffiiifffffff", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_LOD ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, -+ [DX_SATURATE ] = {"g", "R", sm6_parser_emit_dx_saturate}, - [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_sincos}, - [DX_SPLIT_DOUBLE ] = {"S", "d", sm6_parser_emit_dx_split_double}, - [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, - [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, -+ [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, - [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, - [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, - [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, -+ [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, - [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, - [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, - }; -@@ -5055,7 +5397,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ - break; - case CAST_ZEXT: - case CAST_SEXT: -- /* nop or min precision. TODO: native 16-bit */ -+ /* nop or min precision. TODO: native 16-bit. -+ * Extension instructions could be emitted for min precision, but in Windows -+ * the AMD RX 580 simply drops such instructions, which makes sense as no -+ * assumptions should be made about any behaviour which depends on bit width. */ - if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) - { - op = VKD3DSIH_NOP; -@@ -5187,8 +5532,8 @@ static const struct sm6_cmp_info *sm6_map_cmp2_op(uint64_t code) - [FCMP_OLT] = {VKD3DSIH_LTO}, - [FCMP_OLE] = {VKD3DSIH_GEO, true}, - [FCMP_ONE] = {VKD3DSIH_NEO}, -- [FCMP_ORD] = {VKD3DSIH_INVALID}, -- [FCMP_UNO] = {VKD3DSIH_INVALID}, -+ [FCMP_ORD] = {VKD3DSIH_ORD}, -+ [FCMP_UNO] = {VKD3DSIH_UNO}, - [FCMP_UEQ] = {VKD3DSIH_EQU}, - [FCMP_UGT] = {VKD3DSIH_LTU, true}, - [FCMP_UGE] = {VKD3DSIH_GEU}, -@@ -5459,6 +5804,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record - register_index_address_init(®->idx[1], elem_value, sm6); - reg->idx[1].is_in_bounds = is_in_bounds; - reg->idx_count = 2; -+ dst->structure_stride = src->structure_stride; - - ins->handler_idx = VKD3DSIH_NOP; - } -@@ -5467,8 +5813,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor - struct vkd3d_shader_instruction *ins, struct sm6_value *dst) - { - const struct sm6_type *elem_type = NULL, *pointee_type; -- struct vkd3d_shader_src_param *src_param; -- unsigned int alignment, i = 0; -+ unsigned int alignment, operand_count, i = 0; -+ struct vkd3d_shader_src_param *src_params; - const struct sm6_value *ptr; - uint64_t alignment_code; - -@@ -5505,12 +5851,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor - if (record->operands[i]) - WARN("Ignoring volatile modifier.\n"); - -- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (ptr->structure_stride) -+ { -+ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); - -- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -- return; -- src_param_init_from_value(&src_param[0], ptr); -- src_param->reg.alignment = alignment; -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; -+ if (ptr->u.reg.idx[1].rel_addr) -+ src_params[0] = *ptr->u.reg.idx[1].rel_addr; -+ else -+ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); -+ /* Struct offset is always zero as there is no struct, just an array. */ -+ src_param_make_constant_uint(&src_params[1], 0); -+ src_param_init_from_value(&src_params[2], ptr); -+ src_params[2].reg.alignment = alignment; -+ } -+ else -+ { -+ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV); -+ -+ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) -+ return; -+ if (operand_count > 1) -+ src_param_make_constant_uint(&src_params[0], 0); -+ src_param_init_from_value(&src_params[operand_count - 1], ptr); -+ src_params[operand_count - 1].reg.alignment = alignment; -+ } - - instruction_dst_param_init_ssa_scalar(ins, sm6); - } -@@ -5628,11 +5996,11 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record - static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, - struct vkd3d_shader_instruction *ins, struct sm6_value *dst) - { -- struct vkd3d_shader_src_param *src_param; -+ unsigned int i = 0, alignment, operand_count; -+ struct vkd3d_shader_src_param *src_params; - struct vkd3d_shader_dst_param *dst_param; - const struct sm6_type *pointee_type; - const struct sm6_value *ptr, *src; -- unsigned int i = 0, alignment; - uint64_t alignment_code; - - if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -@@ -5665,16 +6033,40 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco - if (record->operands[i]) - WARN("Ignoring volatile modifier.\n"); - -- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (ptr->structure_stride) -+ { -+ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); - -- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -- return; -- src_param_init_from_value(&src_param[0], src); -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; -+ if (ptr->u.reg.idx[1].rel_addr) -+ src_params[0] = *ptr->u.reg.idx[1].rel_addr; -+ else -+ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); -+ /* Struct offset is always zero as there is no struct, just an array. */ -+ src_param_make_constant_uint(&src_params[1], 0); -+ src_param_init_from_value(&src_params[2], src); -+ } -+ else -+ { -+ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); -+ -+ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) -+ return; -+ if (operand_count > 1) -+ src_param_make_constant_uint(&src_params[0], 0); -+ src_param_init_from_value(&src_params[operand_count - 1], src); -+ } - - dst_param = instruction_dst_params_alloc(ins, 1, sm6); - dst_param_init(dst_param); - dst_param->reg = ptr->u.reg; - dst_param->reg.alignment = alignment; -+ /* Groupshared stores contain the address in the src params. */ -+ if (dst_param->reg.type != VKD3DSPR_IDXTEMP) -+ dst_param->reg.idx_count = 1; - } - - static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7752,19 +8144,19 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons - } - - if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], -- &sm6->p.shader_desc.input_signature)) < 0) -+ &sm6->p.program.input_signature)) < 0) - { - return ret; - } - if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], -- &sm6->p.shader_desc.output_signature)) < 0) -+ &sm6->p.program.output_signature)) < 0) - { - return ret; - } - /* TODO: patch constant signature in operand 2. */ - -- sm6_parser_init_input_signature(sm6, &sm6->p.shader_desc.input_signature); -- sm6_parser_init_output_signature(sm6, &sm6->p.shader_desc.output_signature); -+ sm6_parser_init_input_signature(sm6, &sm6->p.program.input_signature); -+ sm6_parser_init_output_signature(sm6, &sm6->p.program.output_signature); - - return VKD3D_OK; - } -@@ -8062,7 +8454,6 @@ static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) - sm6_parser_metadata_cleanup(sm6); - vkd3d_free(sm6->descriptors); - vkd3d_free(sm6->values); -- free_shader_desc(&parser->shader_desc); - vkd3d_free(sm6); - } - -@@ -8080,15 +8471,16 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 - return NULL; - } - --static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, -- const char *source_name, struct vkd3d_shader_message_context *message_context) -+static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *source_name, -+ struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) - { -- const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; -- const struct shader_signature *input_signature = &sm6->p.shader_desc.input_signature; -+ const struct shader_signature *output_signature = &sm6->p.program.output_signature; -+ const struct shader_signature *input_signature = &sm6->p.program.input_signature; -+ size_t count, length, function_count, byte_code_size = dxbc_desc->byte_code_size; - const struct vkd3d_shader_location location = {.source_name = source_name}; - uint32_t version_token, dxil_version, token_count, magic; -+ const uint32_t *byte_code = dxbc_desc->byte_code; - unsigned int chunk_offset, chunk_size; -- size_t count, length, function_count; - enum bitcode_block_abbreviation abbr; - struct vkd3d_shader_version version; - struct dxil_block *block; -@@ -8181,6 +8573,11 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - sm6->ptr = &sm6->start[1]; - sm6->bitpos = 2; - -+ sm6->p.program.input_signature = dxbc_desc->input_signature; -+ sm6->p.program.output_signature = dxbc_desc->output_signature; -+ sm6->p.program.patch_constant_signature = dxbc_desc->patch_constant_signature; -+ memset(dxbc_desc, 0, sizeof(*dxbc_desc)); -+ - block = &sm6->root_block; - if ((ret = dxil_block_init(block, NULL, sm6)) < 0) - { -@@ -8351,7 +8748,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) - { -- struct vkd3d_shader_desc *shader_desc; -+ struct dxbc_shader_desc dxbc_desc = {0}; - uint32_t *byte_code = NULL; - struct sm6_parser *sm6; - int ret; -@@ -8364,35 +8761,37 @@ int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compi - return VKD3D_ERROR_OUT_OF_MEMORY; - } - -- shader_desc = &sm6->p.shader_desc; -- shader_desc->is_dxil = true; -+ dxbc_desc.is_dxil = true; - if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, -- shader_desc)) < 0) -+ &dxbc_desc)) < 0) - { - WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm6); - return ret; - } - -- sm6->p.shader_desc = *shader_desc; -- shader_desc = &sm6->p.shader_desc; -- -- if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) -+ if (((uintptr_t)dxbc_desc.byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) - { - /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC - * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ -- if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) -- ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); -- else -- memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); -+ if (!(byte_code = vkd3d_malloc(align(dxbc_desc.byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) -+ { -+ ERR("Failed to allocate aligned chunk.\n"); -+ free_dxbc_shader_desc(&dxbc_desc); -+ vkd3d_free(sm6); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ memcpy(byte_code, dxbc_desc.byte_code, dxbc_desc.byte_code_size); -+ dxbc_desc.byte_code = byte_code; - } - -- ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, -- compile_info->source_name, message_context); -+ ret = sm6_parser_init(sm6, compile_info->source_name, message_context, &dxbc_desc); -+ free_dxbc_shader_desc(&dxbc_desc); - vkd3d_free(byte_code); - - if (!sm6->p.failed && ret >= 0) -- ret = vsir_validate(&sm6->p); -+ ret = vkd3d_shader_parser_validate(&sm6->p); - - if (sm6->p.failed && ret >= 0) - ret = VKD3D_ERROR_INVALID_SHADER; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index bc70d5220fd..f2be00da33a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -61,9 +61,9 @@ struct fx_write_context; - struct fx_write_context_ops - { - uint32_t (*write_string)(const char *string, struct fx_write_context *fx); -- uint32_t (*write_type)(const struct hlsl_type *type, struct fx_write_context *fx); - void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); - void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); -+ bool are_child_effects_supported; - }; - - struct fx_write_context -@@ -84,8 +84,13 @@ struct fx_write_context - uint32_t buffer_count; - uint32_t numeric_variable_count; - uint32_t object_variable_count; -+ uint32_t shared_object_count; -+ uint32_t shader_variable_count; -+ uint32_t parameter_count; - int status; - -+ bool child_effect; -+ - const struct fx_write_context_ops *ops; - }; - -@@ -97,6 +102,11 @@ static void set_status(struct fx_write_context *fx, int status) - fx->status = status; - } - -+static bool has_annotations(const struct hlsl_ir_var *var) -+{ -+ return var->annotations && !list_empty(&var->annotations->vars); -+} -+ - static uint32_t write_string(const char *string, struct fx_write_context *fx) - { - return fx->ops->write_string(string, fx); -@@ -107,12 +117,16 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - fx->ops->write_pass(var, fx); - } - -+static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); -+ - static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) - { - struct type_entry *type_entry; - unsigned int elements_count; - const char *name; - -+ assert(fx->ctx->profile->major_version >= 4); -+ - if (type->class == HLSL_CLASS_ARRAY) - { - name = hlsl_get_multiarray_element_type(type)->name; -@@ -138,7 +152,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context - if (!(type_entry = hlsl_alloc(fx->ctx, sizeof(*type_entry)))) - return 0; - -- type_entry->offset = fx->ops->write_type(type, fx); -+ type_entry->offset = write_fx_4_type(type, fx); - type_entry->name = name; - type_entry->elements_count = elements_count; - -@@ -151,6 +165,7 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co - struct fx_write_context *fx) - { - unsigned int version = ctx->profile->major_version; -+ struct hlsl_block block; - - memset(fx, 0, sizeof(*fx)); - -@@ -174,12 +189,18 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co - - rb_init(&fx->strings, string_storage_compare); - list_init(&fx->types); -+ -+ fx->child_effect = fx->ops->are_child_effects_supported && ctx->child_effect; -+ -+ hlsl_block_init(&block); -+ hlsl_prepend_global_uniform_copy(fx->ctx, &block); -+ hlsl_block_cleanup(&block); -+ hlsl_calculate_buffer_offsets(fx->ctx); - } - - static int fx_write_context_cleanup(struct fx_write_context *fx) - { - struct type_entry *type, *next_type; -- int status = fx->status; - - rb_destroy(&fx->strings, string_storage_destroy, NULL); - -@@ -189,7 +210,7 @@ static int fx_write_context_cleanup(struct fx_write_context *fx) - vkd3d_free(type); - } - -- return status; -+ return fx->ctx->result; - } - - static bool technique_matches_version(const struct hlsl_ir_var *var, const struct fx_write_context *fx) -@@ -285,6 +306,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - [HLSL_TYPE_UINT ] = 3, - [HLSL_TYPE_BOOL ] = 4, - }; -+ struct hlsl_ctx *ctx = fx->ctx; - uint32_t value = 0; - - switch (type->class) -@@ -295,8 +317,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - value |= numeric_type_class[type->class]; - break; - default: -- FIXME("Unexpected type class %u.\n", type->class); -- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ hlsl_fixme(ctx, &ctx->location, "Not implemented for type class %u.", type->class); - return 0; - } - -@@ -309,8 +330,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - value |= (numeric_base_type[type->base_type] << NUMERIC_BASE_TYPE_SHIFT); - break; - default: -- FIXME("Unexpected base type %u.\n", type->base_type); -- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->base_type); - return 0; - } - -@@ -322,19 +342,14 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - return value; - } - --static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) -+static const char * get_fx_4_type_name(const struct hlsl_type *type) - { -- struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -- uint32_t name_offset, offset, size, stride, numeric_desc; -- uint32_t elements_count = 0; -- const char *name; -- static const uint32_t variable_type[] = -+ static const char * const object_type_names[] = - { -- [HLSL_CLASS_SCALAR] = 1, -- [HLSL_CLASS_VECTOR] = 1, -- [HLSL_CLASS_MATRIX] = 1, -- [HLSL_CLASS_OBJECT] = 2, -- [HLSL_CLASS_STRUCT] = 3, -+ [HLSL_TYPE_PIXELSHADER] = "PixelShader", -+ [HLSL_TYPE_VERTEXSHADER] = "VertexShader", -+ [HLSL_TYPE_RENDERTARGETVIEW] = "RenderTargetView", -+ [HLSL_TYPE_DEPTHSTENCILVIEW] = "DepthStencilView", - }; - static const char * const texture_type_names[] = - { -@@ -360,6 +375,40 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", - }; - -+ if (type->base_type == HLSL_TYPE_TEXTURE) -+ return texture_type_names[type->sampler_dim]; -+ -+ if (type->base_type == HLSL_TYPE_UAV) -+ return uav_type_names[type->sampler_dim]; -+ -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_PIXELSHADER: -+ case HLSL_TYPE_VERTEXSHADER: -+ case HLSL_TYPE_RENDERTARGETVIEW: -+ case HLSL_TYPE_DEPTHSTENCILVIEW: -+ return object_type_names[type->base_type]; -+ default: -+ return type->name; -+ } -+} -+ -+static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ uint32_t name_offset, offset, size, stride, numeric_desc; -+ uint32_t elements_count = 0; -+ const char *name; -+ static const uint32_t variable_type[] = -+ { -+ [HLSL_CLASS_SCALAR] = 1, -+ [HLSL_CLASS_VECTOR] = 1, -+ [HLSL_CLASS_MATRIX] = 1, -+ [HLSL_CLASS_OBJECT] = 2, -+ [HLSL_CLASS_STRUCT] = 3, -+ }; -+ struct hlsl_ctx *ctx = fx->ctx; -+ - /* Resolve arrays to element type and number of elements. */ - if (type->class == HLSL_CLASS_ARRAY) - { -@@ -367,12 +416,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - type = hlsl_get_multiarray_element_type(type); - } - -- if (type->base_type == HLSL_TYPE_TEXTURE) -- name = texture_type_names[type->sampler_dim]; -- else if (type->base_type == HLSL_TYPE_UAV) -- name = uav_type_names[type->sampler_dim]; -- else -- name = type->name; -+ name = get_fx_4_type_name(type); - - name_offset = write_string(name, fx); - offset = put_u32_unaligned(buffer, name_offset); -@@ -387,8 +431,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - put_u32_unaligned(buffer, variable_type[type->class]); - break; - default: -- FIXME("Writing type class %u is not implemented.\n", type->class); -- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ hlsl_fixme(ctx, &ctx->location, "Writing type class %u is not implemented.", type->class); - return 0; - } - -@@ -426,6 +469,8 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - { - static const uint32_t object_type[] = - { -+ [HLSL_TYPE_PIXELSHADER] = 5, -+ [HLSL_TYPE_VERTEXSHADER] = 6, - [HLSL_TYPE_RENDERTARGETVIEW] = 19, - [HLSL_TYPE_DEPTHSTENCILVIEW] = 20, - }; -@@ -456,7 +501,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - switch (type->base_type) - { - case HLSL_TYPE_DEPTHSTENCILVIEW: -+ case HLSL_TYPE_PIXELSHADER: - case HLSL_TYPE_RENDERTARGETVIEW: -+ case HLSL_TYPE_VERTEXSHADER: - put_u32_unaligned(buffer, object_type[type->base_type]); - break; - case HLSL_TYPE_TEXTURE: -@@ -466,8 +513,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - put_u32_unaligned(buffer, uav_type[type->sampler_dim]); - break; - default: -- FIXME("Object type %u is not supported.\n", type->base_type); -- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ hlsl_fixme(ctx, &ctx->location, "Object type %u is not supported.", type->base_type); - return 0; - } - } -@@ -573,6 +619,73 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f - return offset; - } - -+static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, -+ struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ uint32_t semantic_offset, offset, elements_count = 0, name_offset; -+ struct hlsl_ctx *ctx = fx->ctx; -+ size_t i; -+ -+ /* Resolve arrays to element type and number of elements. */ -+ if (type->class == HLSL_CLASS_ARRAY) -+ { -+ elements_count = hlsl_get_multiarray_size(type); -+ type = hlsl_get_multiarray_element_type(type); -+ } -+ -+ name_offset = write_string(name, fx); -+ semantic_offset = write_string(semantic->name, fx); -+ -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_VOID: -+ break; -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing parameter type %u is not implemented.", -+ type->base_type); -+ return 0; -+ }; -+ -+ offset = put_u32(buffer, hlsl_sm1_base_type(type)); -+ put_u32(buffer, hlsl_sm1_class(type)); -+ put_u32(buffer, name_offset); -+ put_u32(buffer, semantic_offset); -+ put_u32(buffer, elements_count); -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_VECTOR: -+ put_u32(buffer, type->dimx); -+ put_u32(buffer, type->dimy); -+ break; -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_MATRIX: -+ put_u32(buffer, type->dimy); -+ put_u32(buffer, type->dimx); -+ break; -+ case HLSL_CLASS_STRUCT: -+ put_u32(buffer, type->e.record.field_count); -+ break; -+ default: -+ ; -+ } -+ -+ if (type->class == HLSL_CLASS_STRUCT) -+ { -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ const struct hlsl_struct_field *field = &type->e.record.fields[i]; -+ write_fx_2_parameter(field->type, field->name, &field->semantic, fx); -+ } -+ } -+ -+ return offset; -+} -+ - static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -@@ -595,6 +708,88 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex - set_u32(buffer, count_offset, count); - } - -+static uint32_t get_fx_2_type_size(const struct hlsl_type *type) -+{ -+ uint32_t size = 0, elements_count; -+ size_t i; -+ -+ if (type->class == HLSL_CLASS_ARRAY) -+ { -+ elements_count = hlsl_get_multiarray_size(type); -+ type = hlsl_get_multiarray_element_type(type); -+ return get_fx_2_type_size(type) * elements_count; -+ } -+ else if (type->class == HLSL_CLASS_STRUCT) -+ { -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ const struct hlsl_struct_field *field = &type->e.record.fields[i]; -+ size += get_fx_2_type_size(field->type); -+ } -+ -+ return size; -+ } -+ -+ return type->dimx * type->dimy * sizeof(float); -+} -+ -+static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ const struct hlsl_type *type = var->data_type; -+ uint32_t i, offset, size, elements_count = 1; -+ -+ size = get_fx_2_type_size(type); -+ -+ if (type->class == HLSL_CLASS_ARRAY) -+ { -+ elements_count = hlsl_get_multiarray_size(type); -+ type = hlsl_get_multiarray_element_type(type); -+ } -+ -+ if (type->class == HLSL_CLASS_OBJECT) -+ { -+ /* Objects are given sequential ids. */ -+ offset = put_u32(buffer, fx->object_variable_count++); -+ for (i = 1; i < elements_count; ++i) -+ put_u32(buffer, fx->object_variable_count++); -+ } -+ else -+ { -+ /* FIXME: write actual initial value */ -+ offset = put_u32(buffer, 0); -+ -+ for (i = 1; i < size / sizeof(uint32_t); ++i) -+ put_u32(buffer, 0); -+ } -+ -+ return offset; -+} -+ -+static void write_fx_2_parameters(struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t desc_offset, value_offset; -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); -+ value_offset = write_fx_2_initial_value(var, fx); -+ -+ put_u32(buffer, desc_offset); /* Parameter description */ -+ put_u32(buffer, value_offset); /* Value */ -+ put_u32(buffer, 0); /* Flags */ -+ -+ put_u32(buffer, 0); /* Annotations count */ -+ if (has_annotations(var)) -+ hlsl_fixme(ctx, &ctx->location, "Writing annotations for parameters is not implemented."); -+ -+ ++fx->parameter_count; -+ } -+} -+ - static const struct fx_write_context_ops fx_2_ops = - { - .write_string = write_fx_2_string, -@@ -604,9 +799,9 @@ static const struct fx_write_context_ops fx_2_ops = - - static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - { -+ uint32_t offset, size, technique_count, parameter_count; - struct vkd3d_bytecode_buffer buffer = { 0 }; - struct vkd3d_bytecode_buffer *structured; -- uint32_t offset, size, technique_count; - struct fx_write_context fx; - - fx_write_context_init(ctx, &fx_2_ops, &fx); -@@ -618,12 +813,13 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, 0xfeff0901); /* Version. */ - offset = put_u32(&buffer, 0); - -- put_u32(structured, 0); /* Parameter count */ -+ parameter_count = put_u32(structured, 0); /* Parameter count */ - technique_count = put_u32(structured, 0); - put_u32(structured, 0); /* Unknown */ - put_u32(structured, 0); /* Object count */ - -- /* TODO: parameters */ -+ write_fx_2_parameters(&fx); -+ set_u32(structured, parameter_count, fx.parameter_count); - - write_techniques(ctx->globals, &fx); - set_u32(structured, technique_count, fx.technique_count); -@@ -643,24 +839,27 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - vkd3d_free(fx.unstructured.data); - vkd3d_free(fx.structured.data); - -- if (!fx.status) -+ if (!fx.technique_count) -+ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found."); -+ -+ if (fx.status < 0) -+ ctx->result = fx.status; -+ -+ if (!ctx->result) - { - out->code = buffer.data; - out->size = buffer.size; - } - -- if (fx.status < 0) -- ctx->result = fx.status; -- - return fx_write_context_cleanup(&fx); - } - - static const struct fx_write_context_ops fx_4_ops = - { - .write_string = write_fx_4_string, -- .write_type = write_fx_4_type, - .write_technique = write_fx_4_technique, - .write_pass = write_fx_4_pass, -+ .are_child_effects_supported = true, - }; - - static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) -@@ -672,6 +871,7 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write - { - HAS_EXPLICIT_BIND_POINT = 0x4, - }; -+ struct hlsl_ctx *ctx = fx->ctx; - - /* Explicit bind point. */ - if (var->reg_reservation.reg_type) -@@ -690,14 +890,18 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write - put_u32(buffer, flags); /* Flags */ - - put_u32(buffer, 0); /* Annotations count */ -- /* FIXME: write annotations */ -+ if (has_annotations(var)) -+ hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); - } - - static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) - { -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); - struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t semantic_offset, bind_point = ~0u; -- uint32_t name_offset, type_offset; -+ uint32_t name_offset, type_offset, i; -+ struct hlsl_ctx *ctx = fx->ctx; - - if (var->reg_reservation.reg_type) - bind_point = var->reg_reservation.reg_index; -@@ -712,8 +916,36 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ - put_u32(buffer, bind_point); /* Explicit bind point */ - -+ if (fx->child_effect && var->storage_modifiers & HLSL_STORAGE_SHARED) -+ { -+ ++fx->shared_object_count; -+ return; -+ } -+ -+ /* Initializer */ -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_TEXTURE: -+ case HLSL_TYPE_UAV: -+ case HLSL_TYPE_RENDERTARGETVIEW: -+ break; -+ case HLSL_TYPE_PIXELSHADER: -+ case HLSL_TYPE_VERTEXSHADER: -+ /* FIXME: write shader blobs, once parser support works. */ -+ for (i = 0; i < elements_count; ++i) -+ put_u32(buffer, 0); -+ ++fx->shader_variable_count; -+ break; -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", -+ type->base_type); -+ } -+ - put_u32(buffer, 0); /* Annotations count */ -- /* FIXME: write annotations */ -+ if (has_annotations(var)) -+ hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); -+ -+ ++fx->object_variable_count; - } - - static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx) -@@ -734,7 +966,8 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - bind_point = b->reservation.reg_index; - if (b->type == HLSL_BUFFER_TEXTURE) - flags |= IS_TBUFFER; -- /* FIXME: set 'single' flag for fx_5_0 */ -+ if (ctx->profile->major_version == 5 && b->modifiers & HLSL_MODIFIER_SINGLE) -+ flags |= IS_SINGLE; - - name_offset = write_string(b->name, fx); - -@@ -768,12 +1001,6 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - static void write_buffers(struct fx_write_context *fx) - { - struct hlsl_buffer *buffer; -- struct hlsl_block block; -- -- hlsl_block_init(&block); -- hlsl_prepend_global_uniform_copy(fx->ctx, &block); -- hlsl_block_init(&block); -- hlsl_calculate_buffer_offsets(fx->ctx); - - LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) - { -@@ -806,21 +1033,23 @@ static bool is_object_variable(const struct hlsl_ir_var *var) - } - } - --static void write_objects(struct fx_write_context *fx) -+static void write_objects(struct fx_write_context *fx, bool shared) - { - struct hlsl_ir_var *var; -- uint32_t count = 0; -+ -+ if (shared && !fx->child_effect) -+ return; - - LIST_FOR_EACH_ENTRY(var, &fx->ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (!is_object_variable(var)) - continue; - -+ if (fx->child_effect && (shared != !!(var->storage_modifiers & HLSL_STORAGE_SHARED))) -+ continue; -+ - write_fx_4_object_variable(var, fx); -- ++count; - } -- -- fx->object_variable_count += count; - } - - static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) -@@ -834,9 +1063,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ - - write_buffers(&fx); -- write_objects(&fx); -+ write_objects(&fx, false); - /* TODO: shared buffers */ -- /* TODO: shared objects */ -+ write_objects(&fx, true); - - write_techniques(ctx->globals, &fx); - -@@ -846,7 +1075,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ - put_u32(&buffer, 0); /* Pool buffer count. */ - put_u32(&buffer, 0); /* Pool variable count. */ -- put_u32(&buffer, 0); /* Pool object count. */ -+ put_u32(&buffer, fx.shared_object_count); /* Shared object count. */ - put_u32(&buffer, fx.technique_count); - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ -@@ -857,7 +1086,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, 0); /* Sampler state count. */ - put_u32(&buffer, 0); /* Rendertarget view count. */ - put_u32(&buffer, 0); /* Depth stencil view count. */ -- put_u32(&buffer, 0); /* Shader count. */ -+ put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ - put_u32(&buffer, 0); /* Inline shader count. */ - - set_u32(&buffer, size_offset, fx.unstructured.size); -@@ -870,15 +1099,15 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - set_status(&fx, buffer.status); - -- if (!fx.status) -+ if (fx.status < 0) -+ ctx->result = fx.status; -+ -+ if (!ctx->result) - { - out->code = buffer.data; - out->size = buffer.size; - } - -- if (fx.status < 0) -- ctx->result = fx.status; -- - return fx_write_context_cleanup(&fx); - } - -@@ -893,7 +1122,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ - - write_buffers(&fx); -- write_objects(&fx); -+ write_objects(&fx, false); - /* TODO: interface variables */ - - write_groups(&fx); -@@ -915,7 +1144,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, 0); /* Sampler state count. */ - put_u32(&buffer, 0); /* Rendertarget view count. */ - put_u32(&buffer, 0); /* Depth stencil view count. */ -- put_u32(&buffer, 0); /* Shader count. */ -+ put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ - put_u32(&buffer, 0); /* Inline shader count. */ - put_u32(&buffer, fx.group_count); /* Group count. */ - put_u32(&buffer, 0); /* UAV count. */ -@@ -933,15 +1162,15 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - set_status(&fx, buffer.status); - -- if (!fx.status) -+ if (fx.status < 0) -+ ctx->result = fx.status; -+ -+ if (!ctx->result) - { - out->code = buffer.data; - out->size = buffer.size; - } - -- if (fx.status < 0) -- ctx->result = fx.status; -- - return fx_write_context_cleanup(&fx); - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index bdd03c1e72a..3977852a48d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -20,29 +20,14 @@ - - struct vkd3d_glsl_generator - { -- struct vkd3d_shader_version version; -+ struct vsir_program *program; - struct vkd3d_string_buffer buffer; - struct vkd3d_shader_location location; - struct vkd3d_shader_message_context *message_context; -+ unsigned int indent; - bool failed; - }; - --struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, -- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) --{ -- struct vkd3d_glsl_generator *generator; -- -- if (!(generator = vkd3d_malloc(sizeof(*generator)))) -- return NULL; -- -- memset(generator, 0, sizeof(*generator)); -- generator->version = *version; -- vkd3d_string_buffer_init(&generator->buffer); -- generator->location = *location; -- generator->message_context = message_context; -- return generator; --} -- - static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( - struct vkd3d_glsl_generator *generator, - enum vkd3d_shader_error error, const char *fmt, ...) -@@ -55,10 +40,23 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( - generator->failed = true; - } - -+static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) -+{ -+ vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); -+} -+ -+static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ shader_glsl_print_indent(&gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); -+} -+ - static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, - const struct vkd3d_shader_instruction *ins) - { -- const struct vkd3d_shader_version *version = &generator->version; -+ const struct vkd3d_shader_version *version = &generator->program->shader_version; - - /* - * TODO: Implement in_subroutine -@@ -66,6 +64,7 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, - */ - if (version->major >= 4) - { -+ shader_glsl_print_indent(&generator->buffer, generator->indent); - vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); - } - } -@@ -73,6 +72,8 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, - static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, - const struct vkd3d_shader_instruction *instruction) - { -+ generator->location = instruction->location; -+ - switch (instruction->handler_idx) - { - case VKD3DSIH_DCL_INPUT: -@@ -83,34 +84,36 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator - shader_glsl_ret(generator, instruction); - break; - default: -- vkd3d_glsl_compiler_error(generator, -- VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Unhandled instruction %#x", instruction->handler_idx); -+ shader_glsl_unhandled(generator, instruction); - break; - } - } - --int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, -- struct vsir_program *program, struct vkd3d_shader_code *out) -+static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, struct vkd3d_shader_code *out) - { -+ const struct vkd3d_shader_instruction_array *instructions = &generator->program->instructions; - unsigned int i; - void *code; - -+ ERR("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ - vkd3d_string_buffer_printf(&generator->buffer, "#version 440\n\n"); - vkd3d_string_buffer_printf(&generator->buffer, "void main()\n{\n"); - -- generator->location.column = 0; -- for (i = 0; i < program->instructions.count; ++i) -+ ++generator->indent; -+ for (i = 0; i < instructions->count; ++i) - { -- generator->location.line = i + 1; -- vkd3d_glsl_handle_instruction(generator, &program->instructions.elements[i]); -+ vkd3d_glsl_handle_instruction(generator, &instructions->elements[i]); - } - -+ vkd3d_string_buffer_printf(&generator->buffer, "}\n"); -+ -+ if (TRACE_ON()) -+ vkd3d_string_buffer_trace(&generator->buffer); -+ - if (generator->failed) - return VKD3D_ERROR_INVALID_SHADER; - -- vkd3d_string_buffer_printf(&generator->buffer, "}\n"); -- - if ((code = vkd3d_malloc(generator->buffer.buffer_size))) - { - memcpy(code, generator->buffer.buffer, generator->buffer.content_size); -@@ -122,8 +125,29 @@ int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, - return VKD3D_OK; - } - --void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator) -+static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) - { -- vkd3d_string_buffer_cleanup(&generator->buffer); -- vkd3d_free(generator); -+ vkd3d_string_buffer_cleanup(&gen->buffer); -+} -+ -+static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, -+ struct vsir_program *program, struct vkd3d_shader_message_context *message_context) -+{ -+ memset(gen, 0, sizeof(*gen)); -+ gen->program = program; -+ vkd3d_string_buffer_init(&gen->buffer); -+ gen->message_context = message_context; -+} -+ -+int glsl_compile(struct vsir_program *program, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_glsl_generator generator; -+ int ret; -+ -+ vkd3d_glsl_generator_init(&generator, program, message_context); -+ ret = vkd3d_glsl_generator_generate(&generator, out); -+ vkd3d_glsl_generator_cleanup(&generator); -+ -+ return ret; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 538f083df9c..a82334e58fd 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -784,7 +784,9 @@ static const char * get_case_insensitive_typename(const char *name) - "dword", - "float", - "matrix", -+ "pixelshader", - "vector", -+ "vertexshader", - }; - unsigned int i; - -@@ -1346,6 +1348,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - -+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -+ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; -+ -+ assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); -+ assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); -+ return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); -+} -+ - struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) - { -@@ -2018,7 +2030,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, - } - - struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, -- const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) -+ uint32_t modifiers, const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) - { - struct hlsl_buffer *buffer; - -@@ -2026,6 +2038,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type - return NULL; - buffer->type = type; - buffer->name = name; -+ buffer->modifiers = modifiers; - if (reservation) - buffer->reservation = *reservation; - buffer->loc = *loc; -@@ -2611,6 +2624,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP2_MUL] = "*", - [HLSL_OP2_NEQUAL] = "!=", - [HLSL_OP2_RSHIFT] = ">>", -+ [HLSL_OP2_SLT] = "slt", - - [HLSL_OP3_CMP] = "cmp", - [HLSL_OP3_DP2ADD] = "dp2add", -@@ -3395,8 +3409,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - {"pass", HLSL_CLASS_OBJECT, HLSL_TYPE_PASS, 1, 1}, - {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, - {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, -- {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, -- {"VERTEXSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, -+ {"pixelshader", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, -+ {"vertexshader", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, - {"RenderTargetView",HLSL_CLASS_OBJECT, HLSL_TYPE_RENDERTARGETVIEW, 1, 1}, - {"DepthStencilView",HLSL_CLASS_OBJECT, HLSL_TYPE_DEPTHSTENCILVIEW, 1, 1}, - }; -@@ -3571,10 +3585,10 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil - list_init(&ctx->buffers); - - if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, -- hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) -+ hlsl_strdup(ctx, "$Globals"), 0, NULL, &ctx->location))) - return false; - if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, -- hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) -+ hlsl_strdup(ctx, "$Params"), 0, NULL, &ctx->location))) - return false; - ctx->cur_buffer = ctx->globals_buffer; - -@@ -3593,6 +3607,10 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil - { - ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; - } -+ else if (option->name == VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT) -+ { -+ ctx->child_effect = !!option->value; -+ } - } - - return true; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index df0a53b20de..561782efbf8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -374,6 +374,7 @@ struct hlsl_attribute - #define HLSL_STORAGE_CENTROID 0x00004000 - #define HLSL_STORAGE_NOPERSPECTIVE 0x00008000 - #define HLSL_STORAGE_LINEAR 0x00010000 -+#define HLSL_MODIFIER_SINGLE 0x00020000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -593,6 +594,8 @@ enum hlsl_ir_expr_op - HLSL_OP2_MUL, - HLSL_OP2_NEQUAL, - HLSL_OP2_RSHIFT, -+ /* SLT(a, b) retrieves 1.0 if (a < b), else 0.0. Only used for SM1-SM3 target vertex shaders. */ -+ HLSL_OP2_SLT, - - /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, - * then adds c. */ -@@ -798,6 +801,7 @@ struct hlsl_buffer - struct vkd3d_shader_location loc; - enum hlsl_buffer_type type; - const char *name; -+ uint32_t modifiers; - /* Register reserved for this buffer, if any. - * If provided, it should be of type 'b' if type is HLSL_BUFFER_CONSTANT and 't' if type is - * HLSL_BUFFER_TEXTURE. */ -@@ -920,6 +924,7 @@ struct hlsl_ctx - uint32_t found_numthreads : 1; - - bool semantic_compat_mapping; -+ bool child_effect; - }; - - struct hlsl_resource_load_params -@@ -1222,7 +1227,7 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - struct hlsl_ir_node *arg2); - struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); - struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, -- const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); -+ uint32_t modifiers, const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, -@@ -1243,6 +1248,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond - struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, - enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -+ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); - - void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); - -@@ -1356,6 +1363,8 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), - struct hlsl_block *block, void *context); - -+D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); -+D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); - bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); - bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 558506db108..600e2cf2c6a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -76,6 +76,7 @@ case {return KW_CASE; } - cbuffer {return KW_CBUFFER; } - centroid {return KW_CENTROID; } - column_major {return KW_COLUMN_MAJOR; } -+ComputeShader {return KW_COMPUTESHADER; } - compile {return KW_COMPILE; } - const {return KW_CONST; } - continue {return KW_CONTINUE; } -@@ -83,6 +84,7 @@ DepthStencilState {return KW_DEPTHSTENCILSTATE; } - DepthStencilView {return KW_DEPTHSTENCILVIEW; } - default {return KW_DEFAULT; } - discard {return KW_DISCARD; } -+DomainShader {return KW_DOMAINSHADER; } - do {return KW_DO; } - double {return KW_DOUBLE; } - else {return KW_ELSE; } -@@ -92,6 +94,7 @@ for {return KW_FOR; } - fxgroup {return KW_FXGROUP; } - GeometryShader {return KW_GEOMETRYSHADER; } - groupshared {return KW_GROUPSHARED; } -+HullShader {return KW_HULLSHADER; } - if {return KW_IF; } - in {return KW_IN; } - inline {return KW_INLINE; } -@@ -105,7 +108,7 @@ out {return KW_OUT; } - packoffset {return KW_PACKOFFSET; } - pass {return KW_PASS; } - PixelShader {return KW_PIXELSHADER; } --precise {return KW_PRECISE; } -+pixelshader {return KW_PIXELSHADER; } - RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } - RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } - RasterizerOrderedTexture1D {return KW_RASTERIZERORDEREDTEXTURE1D; } -@@ -163,6 +166,7 @@ typedef {return KW_TYPEDEF; } - uniform {return KW_UNIFORM; } - vector {return KW_VECTOR; } - VertexShader {return KW_VERTEXSHADER; } -+vertexshader {return KW_VERTEXSHADER; } - void {return KW_VOID; } - volatile {return KW_VOLATILE; } - while {return KW_WHILE; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index cd05fd008a6..ec8b3d22af2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -3155,6 +3155,94 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); - } - -+static bool intrinsic_determinant(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ static const char determinant2x2[] = -+ "%s determinant(%s2x2 m)\n" -+ "{\n" -+ " return m._11 * m._22 - m._12 * m._21;\n" -+ "}"; -+ static const char determinant3x3[] = -+ "%s determinant(%s3x3 m)\n" -+ "{\n" -+ " %s2x2 m1 = { m._22, m._23, m._32, m._33 };\n" -+ " %s2x2 m2 = { m._21, m._23, m._31, m._33 };\n" -+ " %s2x2 m3 = { m._21, m._22, m._31, m._32 };\n" -+ " %s3 v1 = { m._11, -m._12, m._13 };\n" -+ " %s3 v2 = { determinant(m1), determinant(m2), determinant(m3) };\n" -+ " return dot(v1, v2);\n" -+ "}"; -+ static const char determinant4x4[] = -+ "%s determinant(%s4x4 m)\n" -+ "{\n" -+ " %s3x3 m1 = { m._22, m._23, m._24, m._32, m._33, m._34, m._42, m._43, m._44 };\n" -+ " %s3x3 m2 = { m._21, m._23, m._24, m._31, m._33, m._34, m._41, m._43, m._44 };\n" -+ " %s3x3 m3 = { m._21, m._22, m._24, m._31, m._32, m._34, m._41, m._42, m._44 };\n" -+ " %s3x3 m4 = { m._21, m._22, m._23, m._31, m._32, m._33, m._41, m._42, m._43 };\n" -+ " %s4 v1 = { m._11, -m._12, m._13, -m._14 };\n" -+ " %s4 v2 = { determinant(m1), determinant(m2), determinant(m3), determinant(m4) };\n" -+ " return dot(v1, v2);\n" -+ "}"; -+ static const char *templates[] = -+ { -+ [2] = determinant2x2, -+ [3] = determinant3x3, -+ [4] = determinant4x4, -+ }; -+ -+ struct hlsl_ir_node *arg = params->args[0]; -+ const struct hlsl_type *type = arg->data_type; -+ struct hlsl_ir_function_decl *func; -+ const char *typename, *template; -+ unsigned int dim; -+ char *body; -+ -+ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_MATRIX) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); -+ return false; -+ } -+ -+ dim = min(type->dimx, type->dimy); -+ if (dim == 1) -+ { -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) -+ return false; -+ return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); -+ } -+ -+ typename = type->base_type == HLSL_TYPE_HALF ? "half" : "float"; -+ template = templates[dim]; -+ -+ switch (dim) -+ { -+ case 2: -+ body = hlsl_sprintf_alloc(ctx, template, typename, typename); -+ break; -+ case 3: -+ body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, -+ typename, typename, typename, typename); -+ break; -+ case 4: -+ body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, -+ typename, typename, typename, typename, typename); -+ break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (!body) -+ return false; -+ -+ func = hlsl_compile_internal_function(ctx, "determinant", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ - static bool intrinsic_distance(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4138,6 +4226,7 @@ intrinsic_functions[] = - {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, - {"ddy_fine", 1, true, intrinsic_ddy_fine}, - {"degrees", 1, true, intrinsic_degrees}, -+ {"determinant", 1, true, intrinsic_determinant}, - {"distance", 2, true, intrinsic_distance}, - {"dot", 2, true, intrinsic_dot}, - {"exp", 1, true, intrinsic_exp}, -@@ -5243,6 +5332,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_CENTROID - %token KW_COLUMN_MAJOR - %token KW_COMPILE -+%token KW_COMPUTESHADER - %token KW_CONST - %token KW_CONTINUE - %token KW_DEFAULT -@@ -5250,6 +5340,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_DEPTHSTENCILVIEW - %token KW_DISCARD - %token KW_DO -+%token KW_DOMAINSHADER - %token KW_DOUBLE - %token KW_ELSE - %token KW_EXTERN -@@ -5258,6 +5349,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_FXGROUP - %token KW_GEOMETRYSHADER - %token KW_GROUPSHARED -+%token KW_HULLSHADER - %token KW_IF - %token KW_IN - %token KW_INLINE -@@ -5271,7 +5363,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_PACKOFFSET - %token KW_PASS - %token KW_PIXELSHADER --%token KW_PRECISE - %token KW_RASTERIZERORDEREDBUFFER - %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER - %token KW_RASTERIZERORDEREDTEXTURE1D -@@ -5535,10 +5626,6 @@ technique10: - struct hlsl_scope *scope = ctx->cur_scope; - hlsl_pop_scope(ctx); - -- if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT && ctx->profile->major_version == 2) -- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "The 'technique10' keyword is invalid for this profile."); -- - if (!add_technique(ctx, $2, scope, $3, "technique10", &@1)) - YYABORT; - } -@@ -5580,12 +5667,12 @@ effect_group: - } - - buffer_declaration: -- buffer_type any_identifier colon_attribute -+ var_modifiers buffer_type any_identifier colon_attribute - { -- if ($3.semantic.name) -- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); -+ if ($4.semantic.name) -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); - -- if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) -+ if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $2, $3, $1, &$4.reg_reservation, &@3))) - YYABORT; - } - -@@ -6454,6 +6541,14 @@ type_no_void: - { - $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); - } -+ | KW_VERTEXSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true); -+ } -+ | KW_PIXELSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); -+ } - - type: - type_no_void -@@ -6727,10 +6822,6 @@ var_modifiers: - { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOPERSPECTIVE, &@1); - } -- | KW_PRECISE var_modifiers -- { -- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); -- } - | KW_SHARED var_modifiers - { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); -@@ -6779,7 +6870,16 @@ var_modifiers: - { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); - } -- -+ | var_identifier var_modifiers -+ { -+ if (!strcmp($1, "precise")) -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); -+ else if (!strcmp($1, "single")) -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SINGLE, &@1); -+ else -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER, -+ "Unknown modifier %s.", debugstr_a($1)); -+ } - - complex_initializer: - initializer_expr -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 307f86f55b7..ff349ab49ef 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -2647,10 +2647,11 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) - return false; - } - --/* Append a FLOOR before a CAST to int or uint (which is written as a mere MOV). */ -+/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */ - static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg, *floor, *cast2; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; -+ struct hlsl_ir_node *arg, *floor, *res; - struct hlsl_ir_expr *expr; - - if (instr->type != HLSL_IR_EXPR) -@@ -2665,17 +2666,15 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - if (arg->data_type->base_type != HLSL_TYPE_FLOAT && arg->data_type->base_type != HLSL_TYPE_HALF) - return false; - -- /* Check that the argument is not already a FLOOR */ -- if (arg->type == HLSL_IR_EXPR && hlsl_ir_expr(arg)->op == HLSL_OP1_FLOOR) -- return false; -- - if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) - return false; - hlsl_block_add_instr(block, floor); - -- if (!(cast2 = hlsl_new_cast(ctx, floor, instr->data_type, &instr->loc))) -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = floor; -+ if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) - return false; -- hlsl_block_add_instr(block, cast2); -+ hlsl_block_add_instr(block, res); - - return true; - } -@@ -2903,7 +2902,7 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - return true; - } - --/* Use 'movc' for the ternary operator. */ -+/* Use movc/cmp for the ternary operator. */ - static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; -@@ -2929,7 +2928,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return false; - } - -- if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (ctx->profile->major_version < 4) - { - struct hlsl_ir_node *abs, *neg; - -@@ -2947,11 +2946,6 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) - return false; - } -- else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -- { -- hlsl_fixme(ctx, &instr->loc, "Ternary operator is not implemented for %s profile.", ctx->profile->name); -- return false; -- } - else - { - if (cond->data_type->base_type == HLSL_TYPE_FLOAT) -@@ -2981,6 +2975,261 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return true; - } - -+static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, -+ struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res, *ret; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_expr *expr; -+ bool negate = false; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS -+ && expr->op != HLSL_OP2_GEQUAL) -+ return false; -+ -+ arg1 = expr->operands[0].node; -+ arg2 = expr->operands[1].node; -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ -+ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg1_cast); -+ -+ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg2_cast); -+ -+ switch (expr->op) -+ { -+ case HLSL_OP2_EQUAL: -+ case HLSL_OP2_NEQUAL: -+ { -+ struct hlsl_ir_node *neg, *sub, *abs, *abs_neg; -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg); -+ -+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) -+ return false; -+ hlsl_block_add_instr(block, sub); -+ -+ if (ctx->profile->major_version >= 3) -+ { -+ if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, sub, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, abs); -+ } -+ else -+ { -+ /* Use MUL as a precarious ABS. */ -+ if (!(abs = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub))) -+ return false; -+ hlsl_block_add_instr(block, abs); -+ } -+ -+ if (!(abs_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, abs_neg); -+ -+ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, abs_neg, abs))) -+ return false; -+ hlsl_block_add_instr(block, slt); -+ -+ negate = (expr->op == HLSL_OP2_EQUAL); -+ break; -+ } -+ -+ case HLSL_OP2_GEQUAL: -+ case HLSL_OP2_LESS: -+ { -+ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg1_cast, arg2_cast))) -+ return false; -+ hlsl_block_add_instr(block, slt); -+ -+ negate = (expr->op == HLSL_OP2_GEQUAL); -+ break; -+ } -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (negate) -+ { -+ struct hlsl_constant_value one_value; -+ struct hlsl_ir_node *one, *slt_neg; -+ -+ one_value.u[0].f = 1.0; -+ one_value.u[1].f = 1.0; -+ one_value.u[2].f = 1.0; -+ one_value.u[3].f = 1.0; -+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, one); -+ -+ if (!(slt_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, slt_neg); -+ -+ if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, slt_neg))) -+ return false; -+ hlsl_block_add_instr(block, res); -+ } -+ else -+ { -+ res = slt; -+ } -+ -+ /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT, -+ * and casts to BOOL have already been lowered to "!= 0". */ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = res; -+ if (!(ret = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, ret); -+ -+ return true; -+} -+ -+/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to -+ * CMP instructions (only available in pixel shaders). -+ * Based on the following equivalence: -+ * SLT(x, y) -+ * = (x < y) ? 1.0 : 0.0 -+ * = ((x - y) >= 0) ? 0.0 : 1.0 -+ * = CMP(x - y, 0.0, 1.0) -+ */ -+static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; -+ struct hlsl_constant_value zero_value, one_value; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_expr *expr; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP2_SLT) -+ return false; -+ -+ arg1 = expr->operands[0].node; -+ arg2 = expr->operands[1].node; -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ -+ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg1_cast); -+ -+ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg2_cast); -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg); -+ -+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) -+ return false; -+ hlsl_block_add_instr(block, sub); -+ -+ memset(&zero_value, 0, sizeof(zero_value)); -+ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ -+ one_value.u[0].f = 1.0; -+ one_value.u[1].f = 1.0; -+ one_value.u[2].f = 1.0; -+ one_value.u[3].f = 1.0; -+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, one); -+ -+ if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one))) -+ return false; -+ hlsl_block_add_instr(block, cmp); -+ -+ return true; -+} -+ -+/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to -+ * SLT instructions (only available in vertex shaders). -+ * Based on the following equivalence: -+ * CMP(x, y, z) -+ * = (x >= 0) ? y : z -+ * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0) -+ * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0)) -+ */ -+static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2, *add; -+ struct hlsl_constant_value zero_value, one_value; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_expr *expr; -+ unsigned int i; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP3_CMP) -+ return false; -+ -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ -+ for (i = 0; i < 3; ++i) -+ { -+ args[i] = expr->operands[i].node; -+ -+ if (!(args_cast[i] = hlsl_new_cast(ctx, args[i], float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, args_cast[i]); -+ } -+ -+ memset(&zero_value, 0, sizeof(zero_value)); -+ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ -+ one_value.u[0].f = 1.0; -+ one_value.u[1].f = 1.0; -+ one_value.u[2].f = 1.0; -+ one_value.u[3].f = 1.0; -+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, one); -+ -+ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, args_cast[0], zero))) -+ return false; -+ hlsl_block_add_instr(block, slt); -+ -+ if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[2], slt))) -+ return false; -+ hlsl_block_add_instr(block, mul1); -+ -+ if (!(neg_slt = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg_slt); -+ -+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg_slt))) -+ return false; -+ hlsl_block_add_instr(block, sub); -+ -+ if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[1], sub))) -+ return false; -+ hlsl_block_add_instr(block, mul2); -+ -+ if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul1, mul2))) -+ return false; -+ hlsl_block_add_instr(block, add); -+ -+ return true; -+} -+ - static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { - struct hlsl_type *type = instr->data_type, *arg_type; -@@ -3308,6 +3557,61 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return true; - } - -+static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_expr *expr; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op == HLSL_OP1_CAST || instr->data_type->base_type == HLSL_TYPE_FLOAT) -+ return false; -+ -+ switch (expr->op) -+ { -+ case HLSL_OP1_ABS: -+ case HLSL_OP1_NEG: -+ case HLSL_OP2_ADD: -+ case HLSL_OP2_DIV: -+ case HLSL_OP2_MAX: -+ case HLSL_OP2_MIN: -+ case HLSL_OP2_MUL: -+ { -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_ir_node *arg, *arg_cast, *float_expr, *ret; -+ struct hlsl_type *float_type; -+ unsigned int i; -+ -+ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) -+ { -+ arg = expr->operands[i].node; -+ if (!arg) -+ continue; -+ -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); -+ if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg_cast); -+ -+ operands[i] = arg_cast; -+ } -+ -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, float_expr); -+ -+ if (!(ret = hlsl_new_cast(ctx, float_expr, instr->data_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, ret); -+ -+ return true; -+ } -+ default: -+ return false; -+ } -+} -+ - static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; -@@ -5087,6 +5391,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - remove_unreachable_code(ctx, body); - hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); - -+ if (profile-> major_version < 4) -+ { -+ lower_ir(ctx, lower_nonfloat_exprs, body); -+ /* Constants casted to float must be folded. */ -+ hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ } -+ - lower_ir(ctx, lower_nonconstant_vector_derefs, body); - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); -@@ -5108,6 +5419,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_round, body); - lower_ir(ctx, lower_ceil, body); - lower_ir(ctx, lower_floor, body); -+ lower_ir(ctx, lower_comparison_operators, body); -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ lower_ir(ctx, lower_slt, body); -+ else -+ lower_ir(ctx, lower_cmp, body); - } - - if (profile->major_version < 2) -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index f0bd85338c6..4f0226187af 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -32,6 +32,9 @@ void vsir_program_cleanup(struct vsir_program *program) - vkd3d_free((void *)program->block_names[i]); - vkd3d_free(program->block_names); - shader_instruction_array_destroy(&program->instructions); -+ shader_signature_cleanup(&program->input_signature); -+ shader_signature_cleanup(&program->output_signature); -+ shader_signature_cleanup(&program->patch_constant_signature); - } - - static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) -@@ -91,9 +94,8 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, - return true; - } - --static enum vkd3d_result instruction_array_lower_texkills(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_lower_texkills(struct vsir_program *program) - { -- struct vsir_program *program = &parser->program; - struct vkd3d_shader_instruction_array *instructions = &program->instructions; - struct vkd3d_shader_instruction *texkill_ins, *ins; - unsigned int components_read = 3 + (program->shader_version.major >= 2); -@@ -227,10 +229,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( - return NULL; - } - --static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info) -+static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) - { -- struct shader_signature *signature = &parser->shader_desc.output_signature; -+ const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; -+ struct shader_signature *signature = &program->output_signature; - const struct vkd3d_shader_varying_map_info *varying_map; - unsigned int i; - -@@ -252,7 +255,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars - * location with a different mask. */ - if (input_mask && input_mask != e->mask) - { -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " - "Output mask %#x does not match input mask %#x.", - e->mask, input_mask); -@@ -269,7 +272,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars - { - if (varying_map->varying_map[i].output_signature_index >= signature->element_count) - { -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " - "The next stage consumes varyings not written by this stage."); - return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -453,7 +456,7 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader - - void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) - { -- vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UINT, 1); -+ vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); - param->reg.dimension = VSIR_DIMENSION_NONE; - param->reg.idx[0].offset = label_id; - } -@@ -464,12 +467,24 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned - src->reg.idx[0].offset = idx; - } - -+static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -+ src->reg.idx[0].offset = idx; -+} -+ - static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) - { - vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); - dst->reg.idx[0].offset = idx; - } - -+static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -+ dst->reg.idx[0].offset = idx; -+} -+ - static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) - { - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -@@ -1383,10 +1398,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - } - } - --static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) - { -- struct io_normaliser normaliser = {parser->program.instructions}; -- struct vsir_program *program = &parser->program; -+ struct io_normaliser normaliser = {program->instructions}; - struct vkd3d_shader_instruction *ins; - bool has_control_point_phase; - unsigned int i, j; -@@ -1394,9 +1408,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse - normaliser.phase = VKD3DSIH_INVALID; - normaliser.shader_type = program->shader_version.type; - normaliser.major = program->shader_version.major; -- normaliser.input_signature = &parser->shader_desc.input_signature; -- normaliser.output_signature = &parser->shader_desc.output_signature; -- normaliser.patch_constant_signature = &parser->shader_desc.patch_constant_signature; -+ normaliser.input_signature = &program->input_signature; -+ normaliser.output_signature = &program->output_signature; -+ normaliser.patch_constant_signature = &program->patch_constant_signature; - - for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) - { -@@ -1439,9 +1453,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse - } - } - -- if (!shader_signature_merge(&parser->shader_desc.input_signature, normaliser.input_range_map, false) -- || !shader_signature_merge(&parser->shader_desc.output_signature, normaliser.output_range_map, false) -- || !shader_signature_merge(&parser->shader_desc.patch_constant_signature, normaliser.pc_range_map, true)) -+ if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) -+ || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) -+ || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) - { - program->instructions = normaliser.instructions; - return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -1668,19 +1682,20 @@ static void remove_dead_code(struct vsir_program *program) - } - } - --static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { - unsigned int i; - -- for (i = 0; i < parser->program.instructions.count; ++i) -+ for (i = 0; i < program->instructions.count; ++i) - { -- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - struct vkd3d_shader_src_param *srcs; - - switch (ins->handler_idx) - { - case VKD3DSIH_TEX: -- if (!(srcs = shader_src_param_allocator_get(&parser->program.instructions.src_params, 3))) -+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(srcs, 0, sizeof(*srcs) * 3); - -@@ -1723,7 +1738,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser - case VKD3DSIH_TEXREG2AR: - case VKD3DSIH_TEXREG2GB: - case VKD3DSIH_TEXREG2RGB: -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " - "Combined sampler instruction %#x.", ins->handler_idx); - return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -1789,10 +1804,10 @@ struct cf_flattener_info - - struct cf_flattener - { -- struct vkd3d_shader_parser *parser; -+ struct vsir_program *program; - - struct vkd3d_shader_location location; -- bool allocation_failed; -+ enum vkd3d_result status; - - struct vkd3d_shader_instruction *instructions; - size_t instruction_capacity; -@@ -1812,13 +1827,20 @@ struct cf_flattener - size_t control_flow_info_size; - }; - -+static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error) -+{ -+ if (flattener->status != VKD3D_OK) -+ return; -+ flattener->status = error; -+} -+ - static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count) - { - if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity, - flattener->instruction_count + count, sizeof(*flattener->instructions))) - { - ERR("Failed to allocate instructions.\n"); -- flattener->allocation_failed = true; -+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); - return NULL; - } - return &flattener->instructions[flattener->instruction_count]; -@@ -1850,9 +1872,9 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ - { - struct vkd3d_shader_src_param *params; - -- if (!(params = vsir_program_get_src_params(&flattener->parser->program, count))) -+ if (!(params = vsir_program_get_src_params(flattener->program, count))) - { -- flattener->allocation_failed = true; -+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); - return NULL; - } - ins->src = params; -@@ -1866,10 +1888,10 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int - - if (!(ins = cf_flattener_require_space(flattener, 1))) - return; -- if (vsir_instruction_init_label(ins, &flattener->location, label_id, &flattener->parser->program)) -+ if (vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program)) - ++flattener->instruction_count; - else -- flattener->allocation_failed = true; -+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); - } - - /* For conditional branches, this returns the false target branch parameter. */ -@@ -1947,7 +1969,7 @@ static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_ - flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info))) - { - ERR("Failed to allocate control flow info structure.\n"); -- flattener->allocation_failed = true; -+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); - return NULL; - } - -@@ -2014,12 +2036,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla - flattener->block_names[block_id] = buffer.buffer; - } - --static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener) -+static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, -+ struct vkd3d_shader_message_context *message_context) - { - bool main_block_open, is_hull_shader, after_declarations_section; -- struct vkd3d_shader_parser *parser = flattener->parser; - struct vkd3d_shader_instruction_array *instructions; -- struct vsir_program *program = &parser->program; -+ struct vsir_program *program = flattener->program; - struct vkd3d_shader_instruction *dst_ins; - size_t i; - -@@ -2041,12 +2063,19 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - flattener->location = instruction->location; - - /* Declarations should occur before the first code block, which in hull shaders is marked by the first -- * phase instruction, and in all other shader types begins with the first label instruction. */ -- if (!after_declarations_section && !vsir_instruction_is_dcl(instruction) -- && instruction->handler_idx != VKD3DSIH_NOP) -+ * phase instruction, and in all other shader types begins with the first label instruction. -+ * Declaring an indexable temp with function scope is not considered a declaration, -+ * because it needs to live inside a function. */ -+ if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) - { -- after_declarations_section = true; -- cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); -+ bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP -+ && instruction->declaration.indexable_temp.has_function_scope; -+ -+ if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) -+ { -+ after_declarations_section = true; -+ cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); -+ } - } - - cf_info = flattener->control_flow_depth -@@ -2064,7 +2093,8 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - break; - - case VKD3DSIH_LABEL: -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ vkd3d_shader_error(message_context, &instruction->location, -+ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: Label instruction."); - return VKD3D_ERROR_NOT_IMPLEMENTED; - -@@ -2229,8 +2259,10 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) - { - WARN("Unexpected src swizzle %#x.\n", src->swizzle); -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, -+ vkd3d_shader_error(message_context, &instruction->location, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, - "The swizzle for a switch case value is not scalar X."); -+ cf_flattener_set_error(flattener, VKD3D_ERROR_INVALID_SHADER); - } - value = *src->reg.u.immconst_u32; - -@@ -2358,21 +2390,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - ++flattener->instruction_count; - } - -- return flattener->allocation_failed ? VKD3D_ERROR_OUT_OF_MEMORY : VKD3D_OK; -+ return flattener->status; - } - --static enum vkd3d_result flatten_control_flow_constructs(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { -- struct vsir_program *program = &parser->program; -- struct cf_flattener flattener = {0}; -+ struct cf_flattener flattener = {.program = program}; - enum vkd3d_result result; - -- flattener.parser = parser; -- result = cf_flattener_iterate_instruction_array(&flattener); -- -- if (result >= 0) -+ if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) - { -- vkd3d_free(parser->program.instructions.elements); -+ vkd3d_free(program->instructions.elements); - program->instructions.elements = flattener.instructions; - program->instructions.capacity = flattener.instruction_capacity; - program->instructions.count = flattener.instruction_count; -@@ -2656,33 +2685,36 @@ fail: - return VKD3D_ERROR_OUT_OF_MEMORY; - } - --static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src); -+static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, -+ struct vkd3d_shader_src_param *src); - - /* This is idempotent: it can be safely applied more than once on the - * same register. */ --static void materialize_ssas_to_temps_process_reg(struct vkd3d_shader_parser *parser, struct vkd3d_shader_register *reg) -+static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg) - { - unsigned int i; - - if (reg->type == VKD3DSPR_SSA) - { - reg->type = VKD3DSPR_TEMP; -- reg->idx[0].offset += parser->program.temp_count; -+ reg->idx[0].offset += program->temp_count; - } - - for (i = 0; i < reg->idx_count; ++i) - if (reg->idx[i].rel_addr) -- materialize_ssas_to_temps_process_src_param(parser, reg->idx[i].rel_addr); -+ materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr); - } - --static void materialize_ssas_to_temps_process_dst_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_dst_param *dst) -+static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, -+ struct vkd3d_shader_dst_param *dst) - { -- materialize_ssas_to_temps_process_reg(parser, &dst->reg); -+ materialize_ssas_to_temps_process_reg(program, &dst->reg); - } - --static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src) -+static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, -+ struct vkd3d_shader_src_param *src) - { -- materialize_ssas_to_temps_process_reg(parser, &src->reg); -+ materialize_ssas_to_temps_process_reg(program, &src->reg); - } - - static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, -@@ -2701,7 +2733,7 @@ static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_so - vkd3d_unreachable(); - } - --static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser *parser, -+static bool materialize_ssas_to_temps_synthesize_mov(struct vsir_program *program, - struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, - const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, - const struct vkd3d_shader_src_param *source, bool invert) -@@ -2709,7 +2741,7 @@ static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser - struct vkd3d_shader_src_param *src; - struct vkd3d_shader_dst_param *dst; - -- if (!vsir_instruction_init_with_params(&parser->program, instruction, loc, -+ if (!vsir_instruction_init_with_params(program, instruction, loc, - cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) - return false; - -@@ -2717,7 +2749,7 @@ static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser - src = instruction->src; - - dst[0] = *dest; -- materialize_ssas_to_temps_process_dst_param(parser, &dst[0]); -+ materialize_ssas_to_temps_process_dst_param(program, &dst[0]); - - assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); - assert(dst[0].modifiers == 0); -@@ -2729,19 +2761,19 @@ static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser - src[1 + invert] = *source; - memset(&src[2 - invert], 0, sizeof(src[2 - invert])); - src[2 - invert].reg = dst[0].reg; -- materialize_ssas_to_temps_process_src_param(parser, &src[1]); -- materialize_ssas_to_temps_process_src_param(parser, &src[2]); -+ materialize_ssas_to_temps_process_src_param(program, &src[1]); -+ materialize_ssas_to_temps_process_src_param(program, &src[2]); - } - else - { - src[0] = *source; -- materialize_ssas_to_temps_process_src_param(parser, &src[0]); -+ materialize_ssas_to_temps_process_src_param(program, &src[0]); - } - - return true; - } - --static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) - { - struct vkd3d_shader_instruction *instructions = NULL; - struct materialize_ssas_to_temps_block_data -@@ -2752,18 +2784,18 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p - size_t ins_capacity = 0, ins_count = 0, i; - unsigned int current_label = 0; - -- if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) -+ if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) - goto fail; - -- if (!(block_index = vkd3d_calloc(parser->program.block_count, sizeof(*block_index)))) -+ if (!(block_index = vkd3d_calloc(program->block_count, sizeof(*block_index)))) - { - ERR("Failed to allocate block index.\n"); - goto fail; - } - -- for (i = 0; i < parser->program.instructions.count; ++i) -+ for (i = 0; i < program->instructions.count; ++i) - { -- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - - switch (ins->handler_idx) - { -@@ -2785,16 +2817,16 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p - } - } - -- for (i = 0; i < parser->program.instructions.count; ++i) -+ for (i = 0; i < program->instructions.count; ++i) - { -- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - size_t j; - - for (j = 0; j < ins->dst_count; ++j) -- materialize_ssas_to_temps_process_dst_param(parser, &ins->dst[j]); -+ materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]); - - for (j = 0; j < ins->src_count; ++j) -- materialize_ssas_to_temps_process_src_param(parser, &ins->src[j]); -+ materialize_ssas_to_temps_process_src_param(program, &ins->src[j]); - - switch (ins->handler_idx) - { -@@ -2815,9 +2847,10 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p - { - const struct vkd3d_shader_src_param *source; - -- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); -- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, -- &parser->program.instructions.elements[j].dst[0], NULL, source, false)) -+ source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], -+ current_label); -+ if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], -+ &ins->location, &program->instructions.elements[j].dst[0], NULL, source, false)) - goto fail; - - ++ins_count; -@@ -2837,9 +2870,10 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p - { - const struct vkd3d_shader_src_param *source; - -- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); -- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, -- &parser->program.instructions.elements[j].dst[0], cond, source, false)) -+ source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], -+ current_label); -+ if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], -+ &ins->location, &program->instructions.elements[j].dst[0], cond, source, false)) - goto fail; - - ++ins_count; -@@ -2849,9 +2883,10 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p - { - const struct vkd3d_shader_src_param *source; - -- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); -- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, -- &parser->program.instructions.elements[j].dst[0], cond, source, true)) -+ source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], -+ current_label); -+ if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], -+ &ins->location, &program->instructions.elements[j].dst[0], cond, source, true)) - goto fail; - - ++ins_count; -@@ -2873,13 +2908,13 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p - instructions[ins_count++] = *ins; - } - -- vkd3d_free(parser->program.instructions.elements); -+ vkd3d_free(program->instructions.elements); - vkd3d_free(block_index); -- parser->program.instructions.elements = instructions; -- parser->program.instructions.capacity = ins_capacity; -- parser->program.instructions.count = ins_count; -- parser->program.temp_count += parser->program.ssa_count; -- parser->program.ssa_count = 0; -+ program->instructions.elements = instructions; -+ program->instructions.capacity = ins_capacity; -+ program->instructions.count = ins_count; -+ program->temp_count += program->ssa_count; -+ program->ssa_count = 0; - - return VKD3D_OK; - -@@ -2890,125 +2925,6 @@ fail: - return VKD3D_ERROR_OUT_OF_MEMORY; - } - --static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *parser) --{ -- const unsigned int block_temp_idx = parser->program.temp_count; -- struct vkd3d_shader_instruction *instructions = NULL; -- const struct vkd3d_shader_location no_loc = {0}; -- size_t ins_capacity = 0, ins_count = 0, i; -- bool first_label_found = false; -- -- if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) -- goto fail; -- -- for (i = 0; i < parser->program.instructions.count; ++i) -- { -- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; -- -- switch (ins->handler_idx) -- { -- case VKD3DSIH_PHI: -- case VKD3DSIH_SWITCH_MONOLITHIC: -- vkd3d_unreachable(); -- -- case VKD3DSIH_LABEL: -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 4)) -- goto fail; -- -- if (!first_label_found) -- { -- first_label_found = true; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) -- goto fail; -- dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); -- src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); -- ins_count++; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0)) -- goto fail; -- ins_count++; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1)) -- goto fail; -- src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx); -- ins_count++; -- } -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1)) -- goto fail; -- src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); -- ins_count++; -- break; -- -- case VKD3DSIH_BRANCH: -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 2)) -- goto fail; -- -- if (vsir_register_is_label(&ins->src[0].reg)) -- { -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) -- goto fail; -- dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); -- src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); -- ins_count++; -- } -- else -- { -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3)) -- goto fail; -- dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); -- instructions[ins_count].src[0] = ins->src[0]; -- src_param_init_const_uint(&instructions[ins_count].src[1], label_from_src_param(&ins->src[1])); -- src_param_init_const_uint(&instructions[ins_count].src[2], label_from_src_param(&ins->src[2])); -- ins_count++; -- } -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0)) -- goto fail; -- ins_count++; -- break; -- -- case VKD3DSIH_RET: -- default: -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) -- goto fail; -- -- instructions[ins_count++] = *ins; -- break; -- } -- } -- -- assert(first_label_found); -- -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3)) -- goto fail; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0)) -- goto fail; -- ins_count++; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0)) -- goto fail; -- ins_count++; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0)) -- goto fail; -- ins_count++; -- -- vkd3d_free(parser->program.instructions.elements); -- parser->program.instructions.elements = instructions; -- parser->program.instructions.capacity = ins_capacity; -- parser->program.instructions.count = ins_count; -- parser->program.temp_count += 1; -- -- return VKD3D_OK; -- --fail: -- vkd3d_free(instructions); -- return VKD3D_ERROR_OUT_OF_MEMORY; --} -- - struct vsir_block_list - { - struct vsir_block **blocks; -@@ -3025,14 +2941,8 @@ static void vsir_block_list_cleanup(struct vsir_block_list *list) - vkd3d_free(list->blocks); - } - --static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) -+static enum vkd3d_result vsir_block_list_add_checked(struct vsir_block_list *list, struct vsir_block *block) - { -- size_t i; -- -- for (i = 0; i < list->count; ++i) -- if (block == list->blocks[i]) -- return VKD3D_OK; -- - if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) - { - ERR("Cannot extend block list.\n"); -@@ -3044,9 +2954,27 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc - return VKD3D_OK; - } - -+static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) -+{ -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ if (block == list->blocks[i]) -+ return VKD3D_FALSE; -+ -+ return vsir_block_list_add_checked(list, block); -+} -+ -+/* It is guaranteed that the relative order is kept. */ -+static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) -+{ -+ --list->count; -+ memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx) * sizeof(*list->blocks)); -+} -+ - struct vsir_block - { -- unsigned int label; -+ unsigned int label, order_pos; - /* `begin' points to the instruction immediately following the - * LABEL that introduces the block. `end' points to the terminator - * instruction (either BRANCH or RET). They can coincide, meaning -@@ -3089,12 +3017,163 @@ static void vsir_block_cleanup(struct vsir_block *block) - vkd3d_free(block->dominates); - } - -+static int block_compare(const void *ptr1, const void *ptr2) -+{ -+ const struct vsir_block *block1 = *(const struct vsir_block **)ptr1; -+ const struct vsir_block *block2 = *(const struct vsir_block **)ptr2; -+ -+ return vkd3d_u32_compare(block1->label, block2->label); -+} -+ -+static void vsir_block_list_sort(struct vsir_block_list *list) -+{ -+ qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); -+} -+ -+static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block) -+{ -+ return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); -+} -+ -+struct vsir_cfg_structure_list -+{ -+ struct vsir_cfg_structure *structures; -+ size_t count, capacity; -+ unsigned int end; -+}; -+ -+struct vsir_cfg_structure -+{ -+ enum vsir_cfg_structure_type -+ { -+ /* Execute a block of the original VSIR program. */ -+ STRUCTURE_TYPE_BLOCK, -+ /* Execute a loop, which is identified by an index. */ -+ STRUCTURE_TYPE_LOOP, -+ /* Execute a `return' or a (possibly) multilevel `break' or -+ * `continue', targeting a loop by its index. If `condition' -+ * is non-NULL, then the jump is conditional (this is -+ * currently not allowed for `return'). */ -+ STRUCTURE_TYPE_JUMP, -+ } type; -+ union -+ { -+ struct vsir_block *block; -+ struct -+ { -+ struct vsir_cfg_structure_list body; -+ unsigned idx; -+ } loop; -+ struct -+ { -+ enum vsir_cfg_jump_type -+ { -+ /* NONE is available as an intermediate value, but it -+ * is not allowed in valid structured programs. */ -+ JUMP_NONE, -+ JUMP_BREAK, -+ JUMP_CONTINUE, -+ JUMP_RET, -+ } type; -+ unsigned int target; -+ struct vkd3d_shader_src_param *condition; -+ bool invert_condition; -+ } jump; -+ } u; -+}; -+ -+static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type); -+static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure); -+ -+static void vsir_cfg_structure_list_cleanup(struct vsir_cfg_structure_list *list) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < list->count; ++i) -+ vsir_cfg_structure_cleanup(&list->structures[i]); -+ vkd3d_free(list->structures); -+} -+ -+static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg_structure_list *list, -+ enum vsir_cfg_structure_type type) -+{ -+ struct vsir_cfg_structure *ret; -+ -+ if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + 1, -+ sizeof(*list->structures))) -+ return NULL; -+ -+ ret = &list->structures[list->count++]; -+ -+ vsir_cfg_structure_init(ret, type); -+ -+ return ret; -+} -+ -+static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type) -+{ -+ memset(structure, 0, sizeof(*structure)); -+ structure->type = type; -+} -+ -+static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) -+{ -+ if (structure->type == STRUCTURE_TYPE_LOOP) -+ vsir_cfg_structure_list_cleanup(&structure->u.loop.body); -+} -+ - struct vsir_cfg - { -+ struct vkd3d_shader_message_context *message_context; - struct vsir_program *program; - struct vsir_block *blocks; - struct vsir_block *entry; - size_t block_count; -+ struct vkd3d_string_buffer debug_buffer; -+ -+ struct vsir_block_list *loops; -+ size_t loops_count, loops_capacity; -+ size_t *loops_by_header; -+ -+ struct vsir_block_list order; -+ struct cfg_loop_interval -+ { -+ /* `begin' is the position of the first block of the loop in -+ * the topological sort; `end' is the position of the first -+ * block after the loop. In other words, `begin' is where a -+ * `continue' instruction would jump and `end' is where a -+ * `break' instruction would jump. */ -+ unsigned int begin, end; -+ /* Each loop interval can be natural or synthetic. Natural -+ * intervals are added to represent loops given by CFG back -+ * edges. Synthetic intervals do not correspond to loops in -+ * the input CFG, but are added to leverage their `break' -+ * instruction in order to execute forward edges. -+ * -+ * For a synthetic loop interval it's not really important -+ * which one is the `begin' block, since we don't need to -+ * execute `continue' for them. So we have some leeway for -+ * moving it provided that these conditions are met: 1. the -+ * interval must contain all `break' instructions that target -+ * it, which in practice means that `begin' can be moved -+ * backward and not forward; 2. intervals must remain properly -+ * nested (for each pair of intervals, either one contains the -+ * other or they are disjoint). -+ * -+ * Subject to these conditions, we try to reuse the same loop -+ * as much as possible (if many forward edges target the same -+ * block), but we still try to keep `begin' as forward as -+ * possible, to keep the loop scope as small as possible. */ -+ bool synthetic; -+ } *loop_intervals; -+ size_t loop_interval_count, loop_interval_capacity; -+ -+ struct vsir_cfg_structure_list structured_program; -+ -+ struct vkd3d_shader_instruction *instructions; -+ size_t ins_capacity, ins_count; -+ unsigned int jump_target_temp_idx; -+ unsigned int temp_count; - }; - - static void vsir_cfg_cleanup(struct vsir_cfg *cfg) -@@ -3104,7 +3183,43 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) - for (i = 0; i < cfg->block_count; ++i) - vsir_block_cleanup(&cfg->blocks[i]); - -+ for (i = 0; i < cfg->loops_count; ++i) -+ vsir_block_list_cleanup(&cfg->loops[i]); -+ -+ vsir_block_list_cleanup(&cfg->order); -+ -+ vsir_cfg_structure_list_cleanup(&cfg->structured_program); -+ - vkd3d_free(cfg->blocks); -+ vkd3d_free(cfg->loops); -+ vkd3d_free(cfg->loops_by_header); -+ vkd3d_free(cfg->loop_intervals); -+ -+ if (TRACE_ON()) -+ vkd3d_string_buffer_cleanup(&cfg->debug_buffer); -+} -+ -+static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, -+ unsigned int end, bool synthetic) -+{ -+ struct cfg_loop_interval *interval; -+ -+ if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity, -+ cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ interval = &cfg->loop_intervals[cfg->loop_interval_count++]; -+ -+ interval->begin = begin; -+ interval->end = end; -+ interval->synthetic = synthetic; -+ -+ return VKD3D_OK; -+} -+ -+static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) -+{ -+ return bitmap_is_set(b1->dominates, b2->label - 1); - } - - static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, -@@ -3162,19 +3277,96 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) - TRACE("}\n"); - } - --static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program) -+static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list); -+ -+static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure *structure) -+{ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ TRACE("%sblock %u\n", cfg->debug_buffer.buffer, structure->u.block->label); -+ break; -+ -+ case STRUCTURE_TYPE_LOOP: -+ TRACE("%s%u : loop {\n", cfg->debug_buffer.buffer, structure->u.loop.idx); -+ -+ vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body); -+ -+ TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx); -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ { -+ const char *type_str; -+ -+ switch (structure->u.jump.type) -+ { -+ case JUMP_RET: -+ TRACE("%sret\n", cfg->debug_buffer.buffer); -+ return; -+ -+ case JUMP_BREAK: -+ type_str = "break"; -+ break; -+ -+ case JUMP_CONTINUE: -+ type_str = "continue"; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ TRACE("%s%s%s %u\n", cfg->debug_buffer.buffer, type_str, -+ structure->u.jump.condition ? "c" : "", structure->u.jump.target); -+ break; -+ } -+ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) -+{ -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " "); -+ -+ for (i = 0; i < list->count; ++i) -+ vsir_cfg_structure_dump(cfg, &list->structures[i]); -+ -+ vkd3d_string_buffer_truncate(&cfg->debug_buffer, cfg->debug_buffer.content_size - 2); -+} -+ -+static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < cfg->structured_program.count; ++i) -+ vsir_cfg_structure_dump(cfg, &cfg->structured_program.structures[i]); -+} -+ -+static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { - struct vsir_block *current_block = NULL; - enum vkd3d_result ret; - size_t i; - - memset(cfg, 0, sizeof(*cfg)); -+ cfg->message_context = message_context; - cfg->program = program; - cfg->block_count = program->block_count; - -+ vsir_block_list_init(&cfg->order); -+ - if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) - return VKD3D_ERROR_OUT_OF_MEMORY; - -+ if (TRACE_ON()) -+ vkd3d_string_buffer_init(&cfg->debug_buffer); -+ - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; -@@ -3285,12 +3477,8 @@ static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, stru - - static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) - { -- struct vkd3d_string_buffer buf; - size_t i, j; - -- if (TRACE_ON()) -- vkd3d_string_buffer_init(&buf); -- - for (i = 0; i < cfg->block_count; ++i) - { - struct vsir_block *block = &cfg->blocks[i]; -@@ -3302,7 +3490,7 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) - - if (TRACE_ON()) - { -- vkd3d_string_buffer_printf(&buf, "Block %u dominates:", block->label); -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates:", block->label); - for (j = 0; j < cfg->block_count; j++) - { - struct vsir_block *block2 = &cfg->blocks[j]; -@@ -3310,46 +3498,952 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) - if (block2->label == 0) - continue; - -- if (bitmap_is_set(block->dominates, j)) -- vkd3d_string_buffer_printf(&buf, " %u", block2->label); -+ if (vsir_block_dominates(block, block2)) -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); - } -- TRACE("%s\n", buf.buffer); -- vkd3d_string_buffer_clear(&buf); -+ TRACE("%s\n", cfg->debug_buffer.buffer); -+ vkd3d_string_buffer_clear(&cfg->debug_buffer); - } - } -- -- if (TRACE_ON()) -- vkd3d_string_buffer_cleanup(&buf); - } - --enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info) -+/* A back edge is an edge X -> Y for which block Y dominates block -+ * X. All the other edges are forward edges, and it is required that -+ * the input CFG is reducible, i.e., it is acyclic once you strip away -+ * the back edges. -+ * -+ * Each back edge X -> Y defines a loop: block X is the header block, -+ * block Y is the back edge block, and the loop consists of all the -+ * blocks which are dominated by the header block and have a path to -+ * the back edge block that doesn't pass through the header block -+ * (including the header block itself). It can be proved that all the -+ * blocks in such a path (connecting a loop block to the back edge -+ * block without passing through the header block) belong to the same -+ * loop. -+ * -+ * If the input CFG is reducible its loops are properly nested (i.e., -+ * each two loops are either disjoint or one is contained in the -+ * other), provided that each block has at most one incoming back -+ * edge. If this condition does not hold, a synthetic block can be -+ * introduced as the only back edge block for the given header block, -+ * with all the previous back edge now being forward edges to the -+ * synthetic block. This is not currently implemented (but it is -+ * rarely found in practice anyway). */ -+static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block, -+ struct vsir_block *header) - { -- struct vkd3d_shader_instruction_array *instructions = &parser->program.instructions; -- enum vkd3d_result result = VKD3D_OK; -+ enum vkd3d_result ret; -+ size_t i; - -- remove_dcl_temps(&parser->program); -+ if ((ret = vsir_block_list_add(loop, block)) < 0) -+ return ret; - -- if ((result = instruction_array_lower_texkills(parser)) < 0) -- return result; -+ if (ret == VKD3D_FALSE || block == header) -+ return VKD3D_OK; - -- if (parser->shader_desc.is_dxil) -+ for (i = 0; i < block->predecessors.count; ++i) - { -- struct vsir_cfg cfg; -+ if ((ret = vsir_cfg_scan_loop(loop, block->predecessors.blocks[i], header)) < 0) -+ return ret; -+ } - -- if ((result = lower_switch_to_if_ladder(&parser->program)) < 0) -- return result; -+ return VKD3D_OK; -+} - -- if ((result = materialize_ssas_to_temps(parser)) < 0) -- return result; -+static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) -+{ -+ size_t i, j, k; - -- if ((result = vsir_cfg_init(&cfg, &parser->program)) < 0) -- return result; -+ if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ memset(cfg->loops_by_header, 0xff, cfg->block_count * sizeof(*cfg->loops_by_header)); - -- vsir_cfg_compute_dominators(&cfg); -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; - -- if ((result = simple_structurizer_run(parser)) < 0) -- { -+ if (block->label == 0) -+ continue; -+ -+ for (j = 0; j < block->successors.count; ++j) -+ { -+ struct vsir_block *header = block->successors.blocks[j]; -+ struct vsir_block_list *loop; -+ enum vkd3d_result ret; -+ -+ /* Is this a back edge? */ -+ if (!vsir_block_dominates(header, block)) -+ continue; -+ -+ if (!vkd3d_array_reserve((void **)&cfg->loops, &cfg->loops_capacity, cfg->loops_count + 1, sizeof(*cfg->loops))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ loop = &cfg->loops[cfg->loops_count]; -+ vsir_block_list_init(loop); -+ -+ if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) -+ return ret; -+ -+ vsir_block_list_sort(loop); -+ -+ if (TRACE_ON()) -+ { -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); -+ -+ for (k = 0; k < loop->count; ++k) -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); -+ -+ TRACE("%s\n", cfg->debug_buffer.buffer); -+ vkd3d_string_buffer_clear(&cfg->debug_buffer); -+ } -+ -+ if (cfg->loops_by_header[header->label - 1] != SIZE_MAX) -+ { -+ FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label); -+ vkd3d_shader_error(cfg->message_context, &header->begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Block %u is header to more than one loop, this is not implemented.", header->label); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ -+ cfg->loops_by_header[header->label - 1] = cfg->loops_count; -+ -+ ++cfg->loops_count; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+struct vsir_cfg_node_sorter -+{ -+ struct vsir_cfg *cfg; -+ struct vsir_cfg_node_sorter_stack_item -+ { -+ struct vsir_block_list *loop; -+ unsigned int seen_count; -+ unsigned int begin; -+ } *stack; -+ size_t stack_count, stack_capacity; -+ struct vsir_block_list available_blocks; -+}; -+ -+static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) -+{ -+ struct vsir_block_list *loop = NULL; -+ struct vsir_cfg_node_sorter_stack_item *item; -+ enum vkd3d_result ret; -+ -+ if (sorter->cfg->loops_by_header[block->label - 1] != SIZE_MAX) -+ loop = &sorter->cfg->loops[sorter->cfg->loops_by_header[block->label - 1]]; -+ -+ if ((ret = vsir_block_list_add_checked(&sorter->available_blocks, block)) < 0) -+ return ret; -+ -+ if (!loop) -+ return VKD3D_OK; -+ -+ if (!vkd3d_array_reserve((void **)&sorter->stack, &sorter->stack_capacity, sorter->stack_count + 1, sizeof(*sorter->stack))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ item = &sorter->stack[sorter->stack_count++]; -+ item->loop = loop; -+ item->seen_count = 0; -+ item->begin = sorter->cfg->order.count; -+ -+ return VKD3D_OK; -+} -+ -+/* Topologically sort the blocks according to the forward edges. By -+ * definition if the input CFG is reducible then its forward edges -+ * form a DAG, so a topological sorting exists. In order to compute it -+ * we keep an array with the incoming degree for each block and an -+ * available list of all the blocks whose incoming degree has reached -+ * zero. At each step we pick a block from the available list and -+ * strip it away from the graph, updating the incoming degrees and -+ * available list. -+ * -+ * In principle at each step we can pick whatever node we want from -+ * the available list, and will get a topological sort -+ * anyway. However, we use these two criteria to give to the computed -+ * order additional properties: -+ * -+ * 1. we keep track of which loops we're into, and pick blocks -+ * belonging to the current innermost loop, so that loops are kept -+ * contiguous in the order; this can always be done when the input -+ * CFG is reducible; -+ * -+ * 2. subject to the requirement above, we always pick the most -+ * recently added block to the available list, because this tends -+ * to keep related blocks and require fewer control flow -+ * primitives. -+ */ -+static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) -+{ -+ struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; -+ unsigned int *in_degrees = NULL; -+ enum vkd3d_result ret; -+ size_t i; -+ -+ if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ -+ if (block->label == 0) -+ { -+ in_degrees[i] = UINT_MAX; -+ continue; -+ } -+ -+ in_degrees[i] = block->predecessors.count; -+ -+ /* Do not count back edges. */ -+ if (cfg->loops_by_header[i] != SIZE_MAX) -+ { -+ assert(in_degrees[i] > 0); -+ in_degrees[i] -= 1; -+ } -+ -+ if (in_degrees[i] == 0 && block != cfg->entry) -+ { -+ WARN("Unexpected entry point %u.\n", block->label); -+ vkd3d_shader_error(cfg->message_context, &block->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Block %u is unreachable from the entry point.", block->label); -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ } -+ -+ if (in_degrees[cfg->entry->label - 1] != 0) -+ { -+ WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); -+ vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]); -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ -+ vsir_block_list_init(&sorter.available_blocks); -+ -+ if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, cfg->entry)) < 0) -+ goto fail; -+ -+ while (sorter.available_blocks.count != 0) -+ { -+ struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL; -+ struct vsir_block *block; -+ size_t new_seen_count; -+ -+ if (sorter.stack_count != 0) -+ inner_stack_item = &sorter.stack[sorter.stack_count - 1]; -+ -+ for (i = sorter.available_blocks.count - 1; ; --i) -+ { -+ if (i == SIZE_MAX) -+ { -+ ERR("Couldn't find any viable next block, is the input CFG reducible?\n"); -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ -+ block = sorter.available_blocks.blocks[i]; -+ -+ if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block)) -+ break; -+ } -+ -+ vsir_block_list_remove_index(&sorter.available_blocks, i); -+ block->order_pos = cfg->order.count; -+ if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) -+ goto fail; -+ -+ /* Close loops: since each loop is a strict subset of any -+ * outer loop, we just need to track how many blocks we've -+ * seen; when I close a loop I mark the same number of seen -+ * blocks for the next outer loop. */ -+ new_seen_count = 1; -+ while (sorter.stack_count != 0) -+ { -+ inner_stack_item = &sorter.stack[sorter.stack_count - 1]; -+ -+ inner_stack_item->seen_count += new_seen_count; -+ -+ assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); -+ if (inner_stack_item->seen_count != inner_stack_item->loop->count) -+ break; -+ -+ if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->begin, -+ cfg->order.count, false)) < 0) -+ goto fail; -+ -+ new_seen_count = inner_stack_item->loop->count; -+ --sorter.stack_count; -+ } -+ -+ /* Remove (forward) edges and make new nodes available. */ -+ for (i = 0; i < block->successors.count; ++i) -+ { -+ struct vsir_block *successor = block->successors.blocks[i]; -+ -+ if (vsir_block_dominates(successor, block)) -+ continue; -+ -+ assert(in_degrees[successor->label - 1] > 0); -+ --in_degrees[successor->label - 1]; -+ -+ if (in_degrees[successor->label - 1] == 0) -+ { -+ if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, successor)) < 0) -+ goto fail; -+ } -+ } -+ } -+ -+ if (cfg->order.count != cfg->block_count) -+ { -+ /* There is a cycle of forward edges. */ -+ WARN("The control flow graph is not reducible.\n"); -+ vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "The control flow graph is not reducible."); -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ -+ assert(sorter.stack_count == 0); -+ -+ vkd3d_free(in_degrees); -+ vkd3d_free(sorter.stack); -+ vsir_block_list_cleanup(&sorter.available_blocks); -+ -+ if (TRACE_ON()) -+ { -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); -+ -+ for (i = 0; i < cfg->order.count; ++i) -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); -+ -+ TRACE("%s\n", cfg->debug_buffer.buffer); -+ vkd3d_string_buffer_clear(&cfg->debug_buffer); -+ } -+ -+ return VKD3D_OK; -+ -+fail: -+ vkd3d_free(in_degrees); -+ vkd3d_free(sorter.stack); -+ vsir_block_list_cleanup(&sorter.available_blocks); -+ -+ return ret; -+} -+ -+/* Sort loop intervals first by ascending begin time and then by -+ * descending end time, so that inner intervals appear after outer -+ * ones and disjoint intervals appear in their proper order. */ -+static int compare_loop_intervals(const void *ptr1, const void *ptr2) -+{ -+ const struct cfg_loop_interval *interval1 = ptr1; -+ const struct cfg_loop_interval *interval2 = ptr2; -+ -+ if (interval1->begin != interval2->begin) -+ return vkd3d_u32_compare(interval1->begin, interval2->begin); -+ -+ return -vkd3d_u32_compare(interval1->end, interval2->end); -+} -+ -+static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) -+{ -+ enum vkd3d_result ret; -+ size_t i, j, k; -+ -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ -+ if (block->label == 0) -+ continue; -+ -+ for (j = 0; j < block->successors.count; ++j) -+ { -+ struct vsir_block *successor = block->successors.blocks[j]; -+ struct cfg_loop_interval *extend = NULL; -+ unsigned int begin; -+ enum -+ { -+ ACTION_DO_NOTHING, -+ ACTION_CREATE_NEW, -+ ACTION_EXTEND, -+ } action = ACTION_CREATE_NEW; -+ -+ /* We've already contructed loop intervals for the back -+ * edges, there's nothing more to do. */ -+ if (vsir_block_dominates(successor, block)) -+ continue; -+ -+ assert(block->order_pos < successor->order_pos); -+ -+ /* Jumping from a block to the following one is always -+ * possible, so nothing to do. */ -+ if (block->order_pos + 1 == successor->order_pos) -+ continue; -+ -+ /* Let's look for a loop interval that already breaks at -+ * `successor' and either contains or can be extended to -+ * contain `block'. */ -+ for (k = 0; k < cfg->loop_interval_count; ++k) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; -+ -+ if (interval->end != successor->order_pos) -+ continue; -+ -+ if (interval->begin <= block->order_pos) -+ { -+ action = ACTION_DO_NOTHING; -+ break; -+ } -+ -+ if (interval->synthetic) -+ { -+ action = ACTION_EXTEND; -+ extend = interval; -+ break; -+ } -+ } -+ -+ if (action == ACTION_DO_NOTHING) -+ continue; -+ -+ /* Ok, we have to decide where the new or replacing -+ * interval has to begin. These are the rules: 1. it must -+ * begin before `block'; 2. intervals must be properly -+ * nested; 3. the new interval should begin as late as -+ * possible, to limit control flow depth and extension. */ -+ begin = block->order_pos; -+ -+ /* Our candidate interval is always [begin, -+ * successor->order_pos), and we move `begin' backward -+ * until the candidate interval contains all the intervals -+ * whose endpoint lies in the candidate interval -+ * itself. */ -+ for (k = 0; k < cfg->loop_interval_count; ++k) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; -+ -+ if (begin < interval->end && interval->end < successor->order_pos) -+ begin = min(begin, interval->begin); -+ } -+ -+ /* New we have to care about the intervals whose begin -+ * point lies in the candidate interval. We cannot move -+ * the candidate interval endpoint, because it is -+ * important that the loop break target matches -+ * `successor'. So we have to move that interval's begin -+ * point to the begin point of the candidate interval, -+ * i.e. `begin'. But what if the interval we should extend -+ * backward is not synthetic? This cannot happen, -+ * fortunately, because it would mean that there is a jump -+ * entering a loop via a block which is not the loop -+ * header, so the CFG would not be reducible. */ -+ for (k = 0; k < cfg->loop_interval_count; ++k) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; -+ -+ if (interval->begin < successor->order_pos && successor->order_pos < interval->end) -+ { -+ if (interval->synthetic) -+ interval->begin = min(begin, interval->begin); -+ assert(begin >= interval->begin); -+ } -+ } -+ -+ if (action == ACTION_EXTEND) -+ extend->begin = begin; -+ else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0) -+ return ret; -+ } -+ } -+ -+ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); -+ -+ if (TRACE_ON()) -+ for (i = 0; i < cfg->loop_interval_count; ++i) -+ TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural", -+ cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); -+ -+ return VKD3D_OK; -+} -+ -+struct vsir_cfg_edge_action -+{ -+ enum vsir_cfg_jump_type jump_type; -+ unsigned int target; -+ struct vsir_block *successor; -+}; -+ -+static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block *block, -+ struct vsir_block *successor, struct vsir_cfg_edge_action *action) -+{ -+ unsigned int i; -+ -+ action->target = UINT_MAX; -+ action->successor = successor; -+ -+ if (successor->order_pos <= block->order_pos) -+ { -+ /* The successor is before the current block, so we have to -+ * use `continue'. The target loop is the innermost that -+ * contains the current block and has the successor as -+ * `continue' target. */ -+ for (i = 0; i < cfg->loop_interval_count; ++i) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; -+ -+ if (interval->begin == successor->order_pos && block->order_pos < interval->end) -+ action->target = i; -+ -+ if (interval->begin > successor->order_pos) -+ break; -+ } -+ -+ assert(action->target != UINT_MAX); -+ action->jump_type = JUMP_CONTINUE; -+ } -+ else -+ { -+ /* The successor is after the current block, so we have to use -+ * `break', or possibly just jump to the following block. The -+ * target loop is the outermost that contains the current -+ * block and has the successor as `break' target. */ -+ for (i = 0; i < cfg->loop_interval_count; ++i) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; -+ -+ if (interval->begin <= block->order_pos && interval->end == successor->order_pos) -+ { -+ action->target = i; -+ break; -+ } -+ } -+ -+ if (action->target == UINT_MAX) -+ { -+ assert(successor->order_pos == block->order_pos + 1); -+ action->jump_type = JUMP_NONE; -+ } -+ else -+ { -+ action->jump_type = JUMP_BREAK; -+ } -+ } -+} -+ -+static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) -+{ -+ unsigned int i, stack_depth = 1, open_interval_idx = 0; -+ struct vsir_cfg_structure_list **stack = NULL; -+ -+ /* It's enough to allocate up to the maximum interval stacking -+ * depth (plus one for the full program), but this is simpler. */ -+ if (!(stack = vkd3d_calloc(cfg->loop_interval_count + 1, sizeof(*stack)))) -+ goto fail; -+ cfg->structured_program.end = cfg->order.count; -+ stack[0] = &cfg->structured_program; -+ -+ for (i = 0; i < cfg->order.count; ++i) -+ { -+ struct vsir_block *block = cfg->order.blocks[i]; -+ struct vsir_cfg_structure *structure; -+ -+ assert(stack_depth > 0); -+ -+ /* Open loop intervals. */ -+ while (open_interval_idx < cfg->loop_interval_count) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[open_interval_idx]; -+ -+ if (interval->begin != i) -+ break; -+ -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_LOOP))) -+ goto fail; -+ structure->u.loop.idx = open_interval_idx++; -+ -+ structure->u.loop.body.end = interval->end; -+ stack[stack_depth++] = &structure->u.loop.body; -+ } -+ -+ /* Execute the block. */ -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_BLOCK))) -+ goto fail; -+ structure->u.block = block; -+ -+ /* Generate between zero and two jump instructions. */ -+ switch (block->end->handler_idx) -+ { -+ case VKD3DSIH_BRANCH: -+ { -+ struct vsir_cfg_edge_action action_true, action_false; -+ bool invert_condition = false; -+ -+ if (vsir_register_is_label(&block->end->src[0].reg)) -+ { -+ unsigned int target = label_from_src_param(&block->end->src[0]); -+ struct vsir_block *successor = &cfg->blocks[target - 1]; -+ -+ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); -+ action_false = action_true; -+ } -+ else -+ { -+ unsigned int target = label_from_src_param(&block->end->src[1]); -+ struct vsir_block *successor = &cfg->blocks[target - 1]; -+ -+ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); -+ -+ target = label_from_src_param(&block->end->src[2]); -+ successor = &cfg->blocks[target - 1]; -+ -+ vsir_cfg_compute_edge_action(cfg, block, successor, &action_false); -+ } -+ -+ /* This will happen if the branch is unconditional, -+ * but also if it's conditional with the same target -+ * in both branches, which can happen in some corner -+ * cases, e.g. when converting switch instructions to -+ * selection ladders. */ -+ if (action_true.successor == action_false.successor) -+ { -+ assert(action_true.jump_type == action_false.jump_type); -+ } -+ else -+ { -+ /* At most one branch can just fall through to the -+ * next block, in which case we make sure it's the -+ * false branch. */ -+ if (action_true.jump_type == JUMP_NONE) -+ { -+ struct vsir_cfg_edge_action tmp = action_true; -+ action_true = action_false; -+ action_false = tmp; -+ invert_condition = true; -+ } -+ -+ assert(action_true.jump_type != JUMP_NONE); -+ -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) -+ goto fail; -+ structure->u.jump.type = action_true.jump_type; -+ structure->u.jump.target = action_true.target; -+ structure->u.jump.condition = &block->end->src[0]; -+ structure->u.jump.invert_condition = invert_condition; -+ } -+ -+ if (action_false.jump_type != JUMP_NONE) -+ { -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) -+ goto fail; -+ structure->u.jump.type = action_false.jump_type; -+ structure->u.jump.target = action_false.target; -+ } -+ break; -+ } -+ -+ case VKD3DSIH_RET: -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) -+ goto fail; -+ structure->u.jump.type = JUMP_RET; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ /* Close loop intervals. */ -+ while (stack_depth > 0) -+ { -+ if (stack[stack_depth - 1]->end != i + 1) -+ break; -+ -+ --stack_depth; -+ } -+ } -+ -+ assert(stack_depth == 0); -+ assert(open_interval_idx == cfg->loop_interval_count); -+ -+ if (TRACE_ON()) -+ vsir_cfg_dump_structured_program(cfg); -+ -+ vkd3d_free(stack); -+ -+ return VKD3D_OK; -+ -+fail: -+ vkd3d_free(stack); -+ -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+} -+ -+static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list, unsigned int loop_idx) -+{ -+ const struct vkd3d_shader_location no_loc = {0}; -+ enum vkd3d_result ret; -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i]; -+ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ { -+ struct vsir_block *block = structure->u.block; -+ -+ if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + (block->end - block->begin))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ memcpy(&cfg->instructions[cfg->ins_count], block->begin, (char *)block->end - (char *)block->begin); -+ -+ cfg->ins_count += block->end - block->begin; -+ break; -+ } -+ -+ case STRUCTURE_TYPE_LOOP: -+ { -+ if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_LOOP); -+ -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0) -+ return ret; -+ -+ if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 5)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); -+ -+ /* Add a trampoline to implement multilevel jumping depending on the stored -+ * jump_target value. */ -+ if (loop_idx != UINT_MAX) -+ { -+ /* If the multilevel jump is a `continue' and the target is the loop we're inside -+ * right now, then we can finally do the `continue'. */ -+ const unsigned int outer_continue_target = loop_idx << 1 | 1; -+ /* If the multilevel jump is a `continue' to any other target, or if it is a `break' -+ * and the target is not the loop we just finished emitting, then it means that -+ * we have to reach an outer loop, so we keep breaking. */ -+ const unsigned int inner_break_target = structure->u.loop.idx << 1; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -+ &no_loc, VKD3DSIH_IEQ, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); -+ src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); -+ src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], outer_continue_target); -+ -+ ++cfg->ins_count; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -+ &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); -+ -+ ++cfg->ins_count; -+ ++cfg->temp_count; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -+ &no_loc, VKD3DSIH_IEQ, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); -+ src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); -+ src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], inner_break_target); -+ -+ ++cfg->ins_count; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -+ &no_loc, VKD3DSIH_BREAKP, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; -+ -+ src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); -+ -+ ++cfg->ins_count; -+ ++cfg->temp_count; -+ } -+ -+ break; -+ } -+ -+ case STRUCTURE_TYPE_JUMP: -+ { -+ /* Encode the jump target as the loop index plus a bit to remember whether -+ * we're breaking or continueing. */ -+ unsigned int jump_target = structure->u.jump.target << 1; -+ enum vkd3d_shader_opcode opcode; -+ -+ switch (structure->u.jump.type) -+ { -+ case JUMP_CONTINUE: -+ /* If we're continueing the loop we're directly inside, then we can emit a -+ * `continue'. Otherwise we first have to break all the loops between here -+ * and the loop to continue, recording our intention to continue -+ * in the lowest bit of jump_target. */ -+ if (structure->u.jump.target == loop_idx) -+ { -+ opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; -+ break; -+ } -+ jump_target |= 1; -+ /* fall through */ -+ -+ case JUMP_BREAK: -+ opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; -+ break; -+ -+ case JUMP_RET: -+ assert(!structure->u.jump.condition); -+ opcode = VKD3DSIH_RET; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) -+ { -+ if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -+ &no_loc, VKD3DSIH_MOV, 1, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ dst_param_init_temp_uint(&cfg->instructions[cfg->ins_count].dst[0], cfg->jump_target_temp_idx); -+ src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[0], jump_target); -+ -+ ++cfg->ins_count; -+ } -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -+ &no_loc, opcode, 0, !!structure->u.jump.condition)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (structure->u.jump.invert_condition) -+ cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; -+ -+ if (structure->u.jump.condition) -+ cfg->instructions[cfg->ins_count].src[0] = *structure->u.jump.condition; -+ -+ ++cfg->ins_count; -+ break; -+ } -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) -+{ -+ enum vkd3d_result ret; -+ size_t i; -+ -+ cfg->jump_target_temp_idx = cfg->program->temp_count; -+ cfg->temp_count = cfg->program->temp_count + 1; -+ -+ if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ /* Copy declarations until the first block. */ -+ for (i = 0; i < cfg->program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &cfg->program->instructions.elements[i]; -+ -+ if (ins->handler_idx == VKD3DSIH_LABEL) -+ break; -+ -+ cfg->instructions[cfg->ins_count++] = *ins; -+ } -+ -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX)) < 0) -+ goto fail; -+ -+ vkd3d_free(cfg->program->instructions.elements); -+ cfg->program->instructions.elements = cfg->instructions; -+ cfg->program->instructions.capacity = cfg->ins_capacity; -+ cfg->program->instructions.count = cfg->ins_count; -+ cfg->program->temp_count = cfg->temp_count; -+ -+ return VKD3D_OK; -+ -+fail: -+ vkd3d_free(cfg->instructions); -+ -+ return ret; -+} -+ -+enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -+{ -+ enum vkd3d_result result = VKD3D_OK; -+ -+ remove_dcl_temps(program); -+ -+ if ((result = vsir_program_lower_texkills(program)) < 0) -+ return result; -+ -+ if (program->shader_version.major >= 6) -+ { -+ struct vsir_cfg cfg; -+ -+ if ((result = lower_switch_to_if_ladder(program)) < 0) -+ return result; -+ -+ if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) -+ return result; -+ -+ if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) -+ return result; -+ -+ vsir_cfg_compute_dominators(&cfg); -+ -+ if ((result = vsir_cfg_compute_loops(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ -+ if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ -+ if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ -+ if ((result = vsir_cfg_build_structured_program(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ -+ if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) -+ { - vsir_cfg_cleanup(&cfg); - return result; - } -@@ -3358,55 +4452,55 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, - } - else - { -- if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) - { -- if ((result = remap_output_signature(parser, compile_info)) < 0) -+ if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) - return result; - } - -- if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL) -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) - { -- if ((result = instruction_array_flatten_hull_shader_phases(instructions)) < 0) -+ if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) - return result; - -- if ((result = instruction_array_normalise_hull_shader_control_point_io(instructions, -- &parser->shader_desc.input_signature)) < 0) -+ if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, -+ &program->input_signature)) < 0) - return result; - } - -- if ((result = shader_normalise_io_registers(parser)) < 0) -+ if ((result = vsir_program_normalise_io_registers(program)) < 0) - return result; - -- if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0) -+ if ((result = instruction_array_normalise_flat_constants(program)) < 0) - return result; - -- remove_dead_code(&parser->program); -+ remove_dead_code(program); - -- if ((result = normalise_combined_samplers(parser)) < 0) -+ if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) - return result; - } - -- if ((result = flatten_control_flow_constructs(parser)) < 0) -+ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; - - if (TRACE_ON()) -- vkd3d_shader_trace(&parser->program); -+ vkd3d_shader_trace(program); - -- if (!parser->failed && (result = vsir_validate(parser)) < 0) -+ if ((result = vsir_program_validate(program, config_flags, -+ compile_info->source_name, message_context)) < 0) - return result; - -- if (parser->failed) -- result = VKD3D_ERROR_INVALID_SHADER; -- - return result; - } - - struct validation_context - { -- struct vkd3d_shader_parser *parser; -+ struct vkd3d_shader_message_context *message_context; - const struct vsir_program *program; - size_t instruction_idx; -+ struct vkd3d_shader_location null_location; - bool invalid_instruction_idx; -+ enum vkd3d_result status; - bool dcl_temps_found; - enum vkd3d_shader_opcode phase; - enum cf_type -@@ -3452,16 +4546,21 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c - - if (ctx->invalid_instruction_idx) - { -- vkd3d_shader_parser_error(ctx->parser, error, "%s", buf.buffer); -+ vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); - ERR("VSIR validation error: %s\n", buf.buffer); - } - else - { -- vkd3d_shader_parser_error(ctx->parser, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); -+ const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; -+ vkd3d_shader_error(ctx->message_context, &ins->location, error, -+ "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); - ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); - } - - vkd3d_string_buffer_cleanup(&buf); -+ -+ if (!ctx->status) -+ ctx->status = VKD3D_ERROR_INVALID_SHADER; - } - - static void vsir_validate_src_param(struct validation_context *ctx, -@@ -3515,10 +4614,10 @@ static void vsir_validate_register(struct validation_context *ctx, - if (reg->idx[0].rel_addr) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); - -- if (reg->idx[0].offset >= ctx->parser->program.temp_count) -+ if (reg->idx[0].offset >= ctx->program->temp_count) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", -- reg->idx[0].offset, ctx->parser->program.temp_count); -+ reg->idx[0].offset, ctx->program->temp_count); - break; - } - -@@ -3606,7 +4705,7 @@ static void vsir_validate_register(struct validation_context *ctx, - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", - reg->precision); - -- if (reg->data_type != VKD3D_DATA_UINT) -+ if (reg->data_type != VKD3D_DATA_UNUSED) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", - reg->data_type); - -@@ -3708,7 +4807,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx, - switch (dst->reg.type) - { - case VKD3DSPR_SSA: -- if (dst->reg.idx[0].offset < ctx->parser->program.ssa_count) -+ if (dst->reg.idx[0].offset < ctx->program->ssa_count) - { - struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; - -@@ -3761,7 +4860,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, - switch (src->reg.type) - { - case VKD3DSPR_SSA: -- if (src->reg.idx[0].offset < ctx->parser->program.ssa_count) -+ if (src->reg.idx[0].offset < ctx->program->ssa_count) - { - struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; - unsigned int i; -@@ -3852,7 +4951,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) - size_t i; - - instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; -- ctx->parser->location = instruction->location; - - for (i = 0; i < instruction->dst_count; ++i) - vsir_validate_dst_param(ctx, &instruction->dst[i]); -@@ -4203,17 +5301,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) - } - } - --enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) -+enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, -+ const char *source_name, struct vkd3d_shader_message_context *message_context) - { - struct validation_context ctx = - { -- .parser = parser, -- .program = &parser->program, -+ .message_context = message_context, -+ .program = program, -+ .null_location = {.source_name = source_name}, -+ .status = VKD3D_OK, - .phase = VKD3DSIH_INVALID, - }; - unsigned int i; - -- if (!(parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) -+ if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) - return VKD3D_OK; - - if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) -@@ -4222,7 +5323,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) - if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) - goto fail; - -- for (ctx.instruction_idx = 0; ctx.instruction_idx < parser->program.instructions.count; ++ctx.instruction_idx) -+ for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) - vsir_validate_instruction(&ctx); - - ctx.invalid_instruction_idx = true; -@@ -4247,7 +5348,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) - vkd3d_free(ctx.temps); - vkd3d_free(ctx.ssas); - -- return VKD3D_OK; -+ return ctx.status; - - fail: - vkd3d_free(ctx.blocks); -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 5c87ff15503..673400efd69 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -223,6 +223,11 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d - } - } - -+static bool data_type_is_floating_point(enum vkd3d_data_type data_type) -+{ -+ return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; -+} -+ - #define VKD3D_SPIRV_VERSION 0x00010000 - #define VKD3D_SPIRV_GENERATOR_ID 18 - #define VKD3D_SPIRV_GENERATOR_VERSION 11 -@@ -1524,6 +1529,19 @@ static uint32_t vkd3d_spirv_build_op_logical_equal(struct vkd3d_spirv_builder *b - SpvOpLogicalEqual, result_type, operand0, operand1); - } - -+static uint32_t vkd3d_spirv_build_op_logical_or(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t operand0, uint32_t operand1) -+{ -+ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, -+ SpvOpLogicalOr, result_type, operand0, operand1); -+} -+ -+static uint32_t vkd3d_spirv_build_op_logical_not(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t operand) -+{ -+ return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpLogicalNot, result_type, operand); -+} -+ - static uint32_t vkd3d_spirv_build_op_convert_utof(struct vkd3d_spirv_builder *builder, - uint32_t result_type, uint32_t unsigned_value) - { -@@ -1825,6 +1843,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder - { - switch (data_type) - { -+ case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ - case VKD3D_DATA_FLOAT: - case VKD3D_DATA_SNORM: - case VKD3D_DATA_UNORM: -@@ -1832,6 +1851,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder - break; - case VKD3D_DATA_INT: - case VKD3D_DATA_UINT: -+ case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ - return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); - break; - case VKD3D_DATA_DOUBLE: -@@ -1940,6 +1960,9 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, - || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStorageImageArrayDynamicIndexing) - || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderNonUniformEXT)) - vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_descriptor_indexing"); -+ if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderPixelInterlockEXT) -+ || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderSampleInterlockEXT)) -+ vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_fragment_shader_interlock"); - if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStencilExportEXT)) - vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_shader_stencil_export"); - if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderViewportIndexLayerEXT)) -@@ -2346,6 +2369,7 @@ struct spirv_compiler - unsigned int output_control_point_count; - - bool use_vocp; -+ bool use_invocation_interlock; - bool emit_point_size; - - enum vkd3d_shader_opcode phase; -@@ -2427,14 +2451,14 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) - vkd3d_free(compiler); - } - --static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, -- struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, -+static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, - uint64_t config_flags) - { -- const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; -- const struct shader_signature *output_signature = &shader_desc->output_signature; -+ const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; -+ const struct shader_signature *output_signature = &program->output_signature; - const struct vkd3d_shader_interface_info *shader_interface; - const struct vkd3d_shader_descriptor_offset_info *offset_info; - const struct vkd3d_shader_spirv_target_info *target_info; -@@ -2545,7 +2569,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve - - rb_init(&compiler->symbol_table, vkd3d_symbol_compare); - -- compiler->shader_type = shader_version->type; -+ compiler->shader_type = program->shader_version.type; - - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) - { -@@ -3736,6 +3760,70 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil - return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); - } - -+/* Based on the implementation in the OpenGL Mathematics library. */ -+static uint32_t half_to_float(uint16_t value) -+{ -+ uint32_t s = (value & 0x8000u) << 16; -+ uint32_t e = (value >> 10) & 0x1fu; -+ uint32_t m = value & 0x3ffu; -+ -+ if (!e) -+ { -+ if (!m) -+ { -+ /* Plus or minus zero */ -+ return s; -+ } -+ else -+ { -+ /* Denormalized number -- renormalize it */ -+ -+ while (!(m & 0x400u)) -+ { -+ m <<= 1; -+ --e; -+ } -+ -+ ++e; -+ m &= ~0x400u; -+ } -+ } -+ else if (e == 31u) -+ { -+ /* Positive or negative infinity for zero 'm'. -+ * Nan for non-zero 'm' -- preserve sign and significand bits */ -+ return s | 0x7f800000u | (m << 13); -+ } -+ -+ /* Normalized number */ -+ e += 127u - 15u; -+ m <<= 13; -+ -+ /* Assemble s, e and m. */ -+ return s | (e << 23) | m; -+} -+ -+static uint32_t convert_raw_constant32(enum vkd3d_data_type data_type, unsigned int uint_value) -+{ -+ int16_t i; -+ -+ /* TODO: native 16-bit support. */ -+ if (data_type != VKD3D_DATA_UINT16 && data_type != VKD3D_DATA_HALF) -+ return uint_value; -+ -+ if (data_type == VKD3D_DATA_HALF) -+ return half_to_float(uint_value); -+ -+ /* Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or -+ * should not be sign-extended when 16-bit execution is not supported. The AMD RX 580 Windows -+ * driver has no 16-bit support, and sign-extends all 16-bit constant ints to 32 bits. These -+ * results differ from SM 5. The RX 6750 XT supports 16-bit execution, so constants are not -+ * extended, and results match SM 5. It seems best to replicate the sign-extension, and if -+ * execution is 16-bit, the values will be truncated. */ -+ i = uint_value; -+ return (int32_t)i; -+} -+ - static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) - { -@@ -3748,14 +3836,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile - if (reg->dimension == VSIR_DIMENSION_SCALAR) - { - for (i = 0; i < component_count; ++i) -- values[i] = *reg->u.immconst_u32; -+ values[i] = convert_raw_constant32(reg->data_type, reg->u.immconst_u32[0]); - } - else - { - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) -- values[j++] = reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]; -+ values[j++] = convert_raw_constant32(reg->data_type, -+ reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]); - } - } - -@@ -3899,6 +3988,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil - - switch (icb->data_type) - { -+ case VKD3D_DATA_HALF: -+ case VKD3D_DATA_UINT16: -+ /* Scalar only. */ -+ for (i = 0; i < element_count; ++i) -+ elements[i] = vkd3d_spirv_get_op_constant(builder, elem_type_id, -+ convert_raw_constant32(icb->data_type, icb->data[i])); -+ break; - case VKD3D_DATA_FLOAT: - case VKD3D_DATA_INT: - case VKD3D_DATA_UINT: -@@ -4087,7 +4183,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, - uint32_t type_id; - - type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); -- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) -+ if (data_type_is_floating_point(reg->data_type)) - return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id); - - FIXME("Unhandled data type %#x.\n", reg->data_type); -@@ -4101,7 +4197,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, - uint32_t type_id; - - type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); -- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) -+ if (data_type_is_floating_point(reg->data_type)) - return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); - else if (data_type_is_integer(reg->data_type)) - return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); -@@ -4285,7 +4381,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, - } - - type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); -- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) -+ if (data_type_is_floating_point(reg->data_type)) - return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id); - - FIXME("Unhandled data type %#x.\n", reg->data_type); -@@ -6272,9 +6368,24 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) - vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); - -- if (d->uav_flags & VKD3DSUF_GLOBALLY_COHERENT) -+ /* ROVs are implicitly globally coherent. */ -+ if (d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW)) - vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationCoherent, NULL, 0); - -+ if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) -+ { -+ if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -+ "Rasteriser-ordered views are only supported in fragment shaders."); -+ else if (!spirv_compiler_is_target_extension_supported(compiler, -+ VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK)) -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -+ "Cannot enable fragment shader interlock. " -+ "The target environment does not support fragment shader interlock."); -+ else -+ compiler->use_invocation_interlock = true; -+ } -+ - if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) - { - assert(structure_stride); /* counters are valid only for structured buffers */ -@@ -6324,20 +6435,26 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - } - - static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, -- const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) -+ const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size, -+ unsigned int structure_stride, bool zero_init) - { -- uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id; -+ uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const SpvStorageClass storage_class = SpvStorageClassWorkgroup; - struct vkd3d_symbol reg_symbol; - -+ /* Alignment is supported only in the Kernel execution model. */ -+ if (alignment) -+ TRACE("Ignoring alignment %u.\n", alignment); -+ - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - length_id = spirv_compiler_get_constant_uint(compiler, size); - array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - - pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); -+ init_id = zero_init ? vkd3d_spirv_get_op_constant_null(builder, array_type_id) : 0; - var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, -- pointer_type_id, storage_class, 0); -+ pointer_type_id, storage_class, init_id); - - spirv_compiler_emit_register_debug_name(builder, var_id, reg); - -@@ -6352,8 +6469,8 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { - const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; -- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, -- tgsm_raw->byte_count / 4, 0); -+ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, tgsm_raw->alignment, -+ tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); - } - - static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler, -@@ -6361,8 +6478,8 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi - { - const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; - unsigned int stride = tgsm_structured->byte_stride / 4; -- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, -- tgsm_structured->structure_count * stride, stride); -+ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, tgsm_structured->alignment, -+ tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); - } - - static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, -@@ -6871,7 +6988,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, - assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); - - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -- if (dst->reg.data_type == VKD3D_DATA_FLOAT) -+ if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) - { - val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); - } -@@ -6880,7 +6997,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, - /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ - val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); - } -- else if (dst->reg.data_type == VKD3D_DATA_UINT) -+ else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) - { - val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); - } -@@ -6909,6 +7026,15 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - SpvOp op = SpvOpMax; - unsigned int i; - -+ if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) -+ { -+ /* At least some drivers support this anyway, but if validation is enabled it will fail. */ -+ FIXME("Unsupported 64-bit source for bit count.\n"); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "64-bit source for bit count is not supported."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ - if (src->reg.data_type == VKD3D_DATA_BOOL) - { - if (dst->reg.data_type == VKD3D_DATA_BOOL) -@@ -7049,6 +7175,16 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp - unsigned int i, component_count; - enum GLSLstd450 glsl_inst; - -+ if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI -+ || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) -+ { -+ /* At least some drivers support this anyway, but if validation is enabled it will fail. */ -+ FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "64-bit source for handler %#x is not supported.", instruction->handler_idx); -+ return; -+ } -+ - glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); - if (glsl_inst == GLSLstd450Bad) - { -@@ -7093,8 +7229,8 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - struct vkd3d_shader_register_info dst_reg_info, src_reg_info; - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; -+ unsigned int i, component_count, write_mask; - uint32_t components[VKD3D_VEC4_SIZE]; -- unsigned int i, component_count; - - if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA - || dst->modifiers || src->modifiers) -@@ -7145,7 +7281,9 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - } - - general_implementation: -- val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -+ write_mask = (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) -+ ? vsir_write_mask_64_from_32(dst->write_mask) : dst->write_mask; -+ val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); - if (dst->reg.data_type != src->reg.data_type) - { - val_id = vkd3d_spirv_build_op_bitcast(builder, vkd3d_spirv_get_type_id_for_data_type(builder, -@@ -7171,8 +7309,15 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, - type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - - if (src[0].reg.data_type != VKD3D_DATA_BOOL) -- condition_id = spirv_compiler_emit_int_to_bool(compiler, -- VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); -+ { -+ if (instruction->handler_idx == VKD3DSIH_CMP) -+ condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, -+ vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, -+ spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); -+ else -+ condition_id = spirv_compiler_emit_int_to_bool(compiler, -+ VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); -+ } - val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src1_id, src2_id); - - spirv_compiler_emit_store_dst(compiler, dst, val_id); -@@ -7335,7 +7480,7 @@ static void spirv_compiler_emit_imad(struct spirv_compiler *compiler, - unsigned int i, component_count; - - component_count = vsir_write_mask_component_count(dst->write_mask); -- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, component_count); -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); - - for (i = 0; i < ARRAY_SIZE(src_ids); ++i) - src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], dst->write_mask); -@@ -7684,6 +7829,56 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co - spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); - } - -+static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, src0_id, src1_id, val_id; -+ -+ type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); -+ src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); -+ /* OpOrdered and OpUnordered are only available in Kernel mode. */ -+ src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); -+ src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); -+ val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); -+ if (instruction->handler_idx == VKD3DSIH_ORD) -+ val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t src0_id, src1_id, type_id, result_id; -+ unsigned int component_count; -+ SpvOp op; -+ -+ switch (instruction->handler_idx) -+ { -+ case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; -+ case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ component_count = vsir_write_mask_component_count(dst->write_mask); -+ -+ src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); -+ -+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); -+ result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id); -+ -+ result_id = spirv_compiler_emit_bool_to_float(compiler, component_count, result_id, false); -+ spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); -+} -+ - static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction, uint32_t target_block_id) - { -@@ -7702,11 +7897,31 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co - return merge_block_id; - } - -+static void spirv_compiler_end_invocation_interlock(struct spirv_compiler *compiler) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ -+ if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilitySampleRateShading)) -+ { -+ spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeSampleInterlockOrderedEXT, NULL, 0); -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderSampleInterlockEXT); -+ } -+ else -+ { -+ spirv_compiler_emit_execution_mode(compiler, SpvExecutionModePixelInterlockOrderedEXT, NULL, 0); -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderPixelInterlockEXT); -+ } -+ vkd3d_spirv_build_op(&builder->function_stream, SpvOpEndInvocationInterlockEXT); -+} -+ - static void spirv_compiler_emit_return(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - -+ if (compiler->use_invocation_interlock) -+ spirv_compiler_end_invocation_interlock(compiler); -+ - if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) - || is_in_control_point_phase(compiler))) - spirv_compiler_emit_shader_epilogue_invocation(compiler); -@@ -9475,6 +9690,11 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) - - if (compiler->emit_point_size) - spirv_compiler_emit_point_size(compiler); -+ -+ /* Maybe in the future we can try to shrink the size of the interlocked -+ * section. */ -+ if (compiler->use_invocation_interlock) -+ vkd3d_spirv_build_op(&compiler->spirv_builder.function_stream, SpvOpBeginInvocationInterlockEXT); - } - - static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, -@@ -9549,6 +9769,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - break; - case VKD3DSIH_DMOVC: - case VKD3DSIH_MOVC: -+ case VKD3DSIH_CMP: - spirv_compiler_emit_movc(compiler, instruction); - break; - case VKD3DSIH_SWAPC: -@@ -9669,6 +9890,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_ULT: - spirv_compiler_emit_comparison_instruction(compiler, instruction); - break; -+ case VKD3DSIH_ORD: -+ case VKD3DSIH_UNO: -+ spirv_compiler_emit_orderedness_instruction(compiler, instruction); -+ break; -+ case VKD3DSIH_SLT: -+ case VKD3DSIH_SGE: -+ spirv_compiler_emit_float_comparison_instruction(compiler, instruction); -+ break; - case VKD3DSIH_BFI: - case VKD3DSIH_IBFE: - case VKD3DSIH_UBFE: -@@ -9899,13 +10128,13 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; - struct vkd3d_shader_instruction_array instructions; - struct vsir_program *program = &parser->program; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; - -- if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) -+ if ((result = vsir_program_normalise(program, compiler->config_flags, -+ compile_info, compiler->message_context)) < 0) - return result; - - if (program->temp_count) -@@ -9924,12 +10153,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - instructions = program->instructions; - memset(&program->instructions, 0, sizeof(program->instructions)); - -- compiler->input_signature = shader_desc->input_signature; -- compiler->output_signature = shader_desc->output_signature; -- compiler->patch_constant_signature = shader_desc->patch_constant_signature; -- memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); -- memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); -- memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); -+ compiler->input_signature = program->input_signature; -+ compiler->output_signature = program->output_signature; -+ compiler->patch_constant_signature = program->patch_constant_signature; -+ memset(&program->input_signature, 0, sizeof(program->input_signature)); -+ memset(&program->output_signature, 0, sizeof(program->output_signature)); -+ memset(&program->patch_constant_signature, 0, sizeof(program->patch_constant_signature)); - compiler->use_vocp = program->use_vocp; - compiler->block_names = program->block_names; - compiler->block_name_count = program->block_name_count; -@@ -10036,8 +10265,8 @@ int spirv_compile(struct vkd3d_shader_parser *parser, - struct spirv_compiler *spirv_compiler; - int ret; - -- if (!(spirv_compiler = spirv_compiler_create(&parser->program.shader_version, &parser->shader_desc, -- compile_info, scan_descriptor_info, message_context, &parser->location, parser->config_flags))) -+ if (!(spirv_compiler = spirv_compiler_create(&parser->program, compile_info, -+ scan_descriptor_info, message_context, &parser->location, parser->config_flags))) - { - ERR("Failed to create SPIR-V compiler.\n"); - return VKD3D_ERROR; -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 3be4e40ab0c..cb4f6d4ddbf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -954,32 +954,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins - case VKD3DSPR_INCONTROLPOINT: - io_masks = priv->input_register_masks; - ranges = &priv->input_index_ranges; -- signature = &priv->p.shader_desc.input_signature; -+ signature = &priv->p.program.input_signature; - break; - case VKD3DSPR_OUTPUT: - if (sm4_parser_is_in_fork_or_join_phase(priv)) - { - io_masks = priv->patch_constant_register_masks; - ranges = &priv->patch_constant_index_ranges; -- signature = &priv->p.shader_desc.patch_constant_signature; -+ signature = &priv->p.program.patch_constant_signature; - } - else - { - io_masks = priv->output_register_masks; - ranges = &priv->output_index_ranges; -- signature = &priv->p.shader_desc.output_signature; -+ signature = &priv->p.program.output_signature; - } - break; - case VKD3DSPR_COLOROUT: - case VKD3DSPR_OUTCONTROLPOINT: - io_masks = priv->output_register_masks; - ranges = &priv->output_index_ranges; -- signature = &priv->p.shader_desc.output_signature; -+ signature = &priv->p.program.output_signature; - break; - case VKD3DSPR_PATCHCONST: - io_masks = priv->patch_constant_register_masks; - ranges = &priv->patch_constant_index_ranges; -- signature = &priv->p.shader_desc.patch_constant_signature; -+ signature = &priv->p.program.patch_constant_signature; - break; - - default: -@@ -1113,7 +1113,7 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u - if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) - { - struct signature_element *e = vsir_signature_find_element_for_reg( -- &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - - e->interpolation_mode = ins->flags; - } -@@ -1128,7 +1128,7 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in - if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) - { - struct signature_element *e = vsir_signature_find_element_for_reg( -- &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - - e->interpolation_mode = ins->flags; - } -@@ -1263,6 +1263,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u - ins->declaration.tgsm_raw.byte_count = *tokens; - if (ins->declaration.tgsm_raw.byte_count % 4) - FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); -+ ins->declaration.tgsm_raw.zero_init = false; - } - - static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1274,6 +1275,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction - ins->declaration.tgsm_structured.structure_count = *tokens; - if (ins->declaration.tgsm_structured.byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); -+ ins->declaration.tgsm_structured.zero_init = false; - } - - static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1748,7 +1750,6 @@ static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - vsir_program_cleanup(&parser->program); -- free_shader_desc(&parser->shader_desc); - vkd3d_free(sm4); - } - -@@ -2504,7 +2505,7 @@ static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = - }; - - static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, -- size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, -+ size_t byte_code_size, const char *source_name, - struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_version version; -@@ -2648,9 +2649,9 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) - { - struct vkd3d_shader_instruction_array *instructions; -- struct vkd3d_shader_desc *shader_desc; - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm4_parser *sm4; -+ struct dxbc_shader_desc dxbc_desc = {0}; - int ret; - - if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) -@@ -2659,36 +2660,40 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - return VKD3D_ERROR_OUT_OF_MEMORY; - } - -- shader_desc = &sm4->p.shader_desc; -- shader_desc->is_dxil = false; -+ dxbc_desc.is_dxil = false; - if ((ret = shader_extract_from_dxbc(&compile_info->source, -- message_context, compile_info->source_name, shader_desc)) < 0) -+ message_context, compile_info->source_name, &dxbc_desc)) < 0) - { - WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm4); - return ret; - } - -- if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, -- compile_info->source_name, &shader_desc->output_signature, message_context)) -+ if (!shader_sm4_init(sm4, dxbc_desc.byte_code, dxbc_desc.byte_code_size, -+ compile_info->source_name, message_context)) - { - WARN("Failed to initialise shader parser.\n"); -- free_shader_desc(shader_desc); -+ free_dxbc_shader_desc(&dxbc_desc); - vkd3d_free(sm4); - return VKD3D_ERROR_INVALID_ARGUMENT; - } - -+ sm4->p.program.input_signature = dxbc_desc.input_signature; -+ sm4->p.program.output_signature = dxbc_desc.output_signature; -+ sm4->p.program.patch_constant_signature = dxbc_desc.patch_constant_signature; -+ memset(&dxbc_desc, 0, sizeof(dxbc_desc)); -+ - /* DXBC stores used masks inverted for output signatures, for some reason. - * We return them un-inverted. */ -- uninvert_used_masks(&shader_desc->output_signature); -+ uninvert_used_masks(&sm4->p.program.output_signature); - if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL) -- uninvert_used_masks(&shader_desc->patch_constant_signature); -+ uninvert_used_masks(&sm4->p.program.patch_constant_signature); - -- if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, -+ if (!shader_sm4_parser_validate_signature(sm4, &sm4->p.program.input_signature, - sm4->input_register_masks, "Input") -- || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, -+ || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.output_signature, - sm4->output_register_masks, "Output") -- || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, -+ || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.patch_constant_signature, - sm4->patch_constant_register_masks, "Patch constant")) - { - shader_sm4_destroy(&sm4->p); -@@ -2721,7 +2726,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - shader_sm4_validate_default_phase_index_ranges(sm4); - - if (!sm4->p.failed) -- vsir_validate(&sm4->p); -+ vkd3d_shader_parser_validate(&sm4->p); - - if (sm4->p.failed) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 4f400d19f6f..81ac84896d4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -23,6 +23,8 @@ - #include - #include - -+/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ -+ - static inline int char_to_int(char c) - { - if ('0' <= c && c <= '9') -@@ -71,8 +73,16 @@ void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer) - - void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer) - { -- buffer->buffer[0] = '\0'; -- buffer->content_size = 0; -+ vkd3d_string_buffer_truncate(buffer, 0); -+} -+ -+void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size) -+{ -+ if (size < buffer->content_size) -+ { -+ buffer->buffer[size] = '\0'; -+ buffer->content_size = size; -+ } - } - - static bool vkd3d_string_buffer_resize(struct vkd3d_string_buffer *buffer, int rc) -@@ -224,6 +234,16 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct - cache->buffers[cache->count++] = buffer; - } - -+void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer) -+{ -+ code->code = buffer->buffer; -+ code->size = buffer->content_size; -+ -+ buffer->buffer = NULL; -+ buffer->buffer_size = 0; -+ buffer->content_size = 0; -+} -+ - void vkd3d_shader_message_context_init(struct vkd3d_shader_message_context *context, - enum vkd3d_shader_log_level log_level) - { -@@ -1438,11 +1458,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - - if (!ret && signature_info) - { -- if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) -+ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->program.input_signature) - || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, -- &parser->shader_desc.output_signature) -+ &parser->program.output_signature) - || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, -- &parser->shader_desc.patch_constant_signature)) -+ &parser->program.patch_constant_signature)) - { - ret = VKD3D_ERROR_OUT_OF_MEMORY; - } -@@ -1470,60 +1490,6 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - return ret; - } - --static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = scan_with_parser(compile_info, message_context, NULL, parser); -- vkd3d_shader_parser_destroy(parser); -- -- return ret; --} -- --static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = scan_with_parser(compile_info, message_context, NULL, parser); -- vkd3d_shader_parser_destroy(parser); -- -- return ret; --} -- --static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = scan_with_parser(compile_info, message_context, NULL, parser); -- vkd3d_shader_parser_destroy(parser); -- -- return ret; --} -- - int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) - { - struct vkd3d_shader_message_context message_context; -@@ -1543,29 +1509,44 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - - vkd3d_shader_dump_shader(compile_info); - -- switch (compile_info->source_type) -+ if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) - { -- case VKD3D_SHADER_SOURCE_DXBC_TPF: -- ret = scan_dxbc(compile_info, &message_context); -- break; -+ FIXME("HLSL support not implemented.\n"); -+ ret = VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ else -+ { -+ struct vkd3d_shader_parser *parser; - -- case VKD3D_SHADER_SOURCE_HLSL: -- FIXME("HLSL support not implemented.\n"); -- ret = VKD3D_ERROR_NOT_IMPLEMENTED; -- break; -+ switch (compile_info->source_type) -+ { -+ case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -+ ret = vkd3d_shader_sm1_parser_create(compile_info, &message_context, &parser); -+ break; - -- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -- ret = scan_d3dbc(compile_info, &message_context); -- break; -+ case VKD3D_SHADER_SOURCE_DXBC_TPF: -+ ret = vkd3d_shader_sm4_parser_create(compile_info, &message_context, &parser); -+ break; - -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: -- ret = scan_dxil(compile_info, &message_context); -- break; -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = vkd3d_shader_sm6_parser_create(compile_info, &message_context, &parser); -+ break; - -- default: -- ERR("Unsupported source type %#x.\n", compile_info->source_type); -- ret = VKD3D_ERROR_INVALID_ARGUMENT; -- break; -+ default: -+ ERR("Unsupported source type %#x.\n", compile_info->source_type); -+ ret = VKD3D_ERROR_INVALID_ARGUMENT; -+ break; -+ } -+ -+ if (ret < 0) -+ { -+ WARN("Failed to create shader parser.\n"); -+ } -+ else -+ { -+ ret = scan_with_parser(compile_info, &message_context, NULL, parser); -+ vkd3d_shader_parser_destroy(parser); -+ } - } - - vkd3d_shader_message_context_trace_messages(&message_context); -@@ -1580,7 +1561,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; -- struct vkd3d_glsl_generator *glsl_generator; -+ struct vsir_program *program = &parser->program; - struct vkd3d_shader_compile_info scan_info; - int ret; - -@@ -1589,22 +1570,13 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - switch (compile_info->target_type) - { - case VKD3D_SHADER_TARGET_D3D_ASM: -- ret = vkd3d_dxbc_binary_to_text(&parser->program, compile_info, out, VSIR_ASM_D3D); -+ ret = d3d_asm_compile(program, compile_info, out, VSIR_ASM_FLAG_NONE); - break; - - case VKD3D_SHADER_TARGET_GLSL: - if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) - return ret; -- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->program.shader_version, -- message_context, &parser->location))) -- { -- ERR("Failed to create GLSL generator.\n"); -- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); -- return VKD3D_ERROR; -- } -- -- ret = vkd3d_glsl_generator_generate(glsl_generator, &parser->program, out); -- vkd3d_glsl_generator_destroy(glsl_generator); -+ ret = glsl_compile(program, out, message_context); - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - -@@ -1624,24 +1596,6 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - return ret; - } - --static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); -- -- vkd3d_shader_parser_destroy(parser); -- return ret; --} -- - static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { -@@ -1657,42 +1611,6 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - --static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); -- -- vkd3d_shader_parser_destroy(parser); -- return ret; --} -- --static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); -- -- vkd3d_shader_parser_destroy(parser); -- return ret; --} -- - int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, char **messages) - { -@@ -1713,26 +1631,43 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - - vkd3d_shader_dump_shader(compile_info); - -- switch (compile_info->source_type) -+ if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) - { -- case VKD3D_SHADER_SOURCE_DXBC_TPF: -- ret = compile_dxbc_tpf(compile_info, out, &message_context); -- break; -+ ret = compile_hlsl(compile_info, out, &message_context); -+ } -+ else -+ { -+ struct vkd3d_shader_parser *parser; - -- case VKD3D_SHADER_SOURCE_HLSL: -- ret = compile_hlsl(compile_info, out, &message_context); -- break; -+ switch (compile_info->source_type) -+ { -+ case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -+ ret = vkd3d_shader_sm1_parser_create(compile_info, &message_context, &parser); -+ break; - -- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -- ret = compile_d3d_bytecode(compile_info, out, &message_context); -- break; -+ case VKD3D_SHADER_SOURCE_DXBC_TPF: -+ ret = vkd3d_shader_sm4_parser_create(compile_info, &message_context, &parser); -+ break; - -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: -- ret = compile_dxbc_dxil(compile_info, out, &message_context); -- break; -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = vkd3d_shader_sm6_parser_create(compile_info, &message_context, &parser); -+ break; - -- default: -- vkd3d_unreachable(); -+ default: -+ ERR("Unsupported source type %#x.\n", compile_info->source_type); -+ ret = VKD3D_ERROR_INVALID_ARGUMENT; -+ break; -+ } -+ -+ if (ret < 0) -+ { -+ WARN("Failed to create shader parser.\n"); -+ } -+ else -+ { -+ ret = vkd3d_shader_parser_compile(parser, compile_info, out, &message_context); -+ vkd3d_shader_parser_destroy(parser); -+ } - } - - vkd3d_shader_message_context_trace_messages(&message_context); -@@ -1937,7 +1872,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - VKD3D_SHADER_TARGET_SPIRV_TEXT, - #endif - VKD3D_SHADER_TARGET_D3D_ASM, --#if 0 -+#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL - VKD3D_SHADER_TARGET_GLSL, - #endif - }; -@@ -1958,13 +1893,21 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - VKD3D_SHADER_TARGET_D3D_ASM, - }; - -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ static const enum vkd3d_shader_target_type dxbc_dxil_types[] = -+ { -+ VKD3D_SHADER_TARGET_SPIRV_BINARY, -+# ifdef HAVE_SPIRV_TOOLS -+ VKD3D_SHADER_TARGET_SPIRV_TEXT, -+# endif -+ VKD3D_SHADER_TARGET_D3D_ASM, -+ }; -+#endif -+ - TRACE("source_type %#x, count %p.\n", source_type, count); - - switch (source_type) - { --#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: --#endif - case VKD3D_SHADER_SOURCE_DXBC_TPF: - *count = ARRAY_SIZE(dxbc_tpf_types); - return dxbc_tpf_types; -@@ -1977,6 +1920,12 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - *count = ARRAY_SIZE(d3dbc_types); - return d3dbc_types; - -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ *count = ARRAY_SIZE(dxbc_dxil_types); -+ return dxbc_dxil_types; -+#endif -+ - default: - *count = 0; - return NULL; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 2d3b3254638..a33b6d2d967 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -148,6 +148,8 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, - VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, - VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, -+ VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, -+ VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -@@ -445,6 +447,7 @@ enum vkd3d_shader_opcode - VKD3DSIH_NOT, - VKD3DSIH_NRM, - VKD3DSIH_OR, -+ VKD3DSIH_ORD, - VKD3DSIH_PHASE, - VKD3DSIH_PHI, - VKD3DSIH_POW, -@@ -516,6 +519,7 @@ enum vkd3d_shader_opcode - VKD3DSIH_UMAX, - VKD3DSIH_UMIN, - VKD3DSIH_UMUL, -+ VKD3DSIH_UNO, - VKD3DSIH_USHR, - VKD3DSIH_UTOD, - VKD3DSIH_UTOF, -@@ -620,14 +624,16 @@ enum vkd3d_data_type - VKD3D_DATA_UINT8, - VKD3D_DATA_UINT64, - VKD3D_DATA_BOOL, -+ VKD3D_DATA_UINT16, -+ VKD3D_DATA_HALF, - - VKD3D_DATA_COUNT, - }; - - static inline bool data_type_is_integer(enum vkd3d_data_type data_type) - { -- return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT -- || data_type == VKD3D_DATA_UINT64; -+ return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT16 -+ || data_type == VKD3D_DATA_UINT || data_type == VKD3D_DATA_UINT64; - } - - static inline bool data_type_is_bool(enum vkd3d_data_type data_type) -@@ -808,6 +814,8 @@ enum vkd3d_shader_type - VKD3D_SHADER_TYPE_COUNT, - }; - -+struct vkd3d_shader_message_context; -+ - struct vkd3d_shader_version - { - enum vkd3d_shader_type type; -@@ -1025,7 +1033,7 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade - unsigned int reg_idx, unsigned int write_mask); - void shader_signature_cleanup(struct shader_signature *signature); - --struct vkd3d_shader_desc -+struct dxbc_shader_desc - { - const uint32_t *byte_code; - size_t byte_code_size; -@@ -1033,7 +1041,10 @@ struct vkd3d_shader_desc - struct shader_signature input_signature; - struct shader_signature output_signature; - struct shader_signature patch_constant_signature; -+}; - -+struct vkd3d_shader_desc -+{ - struct - { - uint32_t used, external; -@@ -1079,14 +1090,18 @@ struct vkd3d_shader_tgsm - struct vkd3d_shader_tgsm_raw - { - struct vkd3d_shader_dst_param reg; -+ unsigned int alignment; - unsigned int byte_count; -+ bool zero_init; - }; - - struct vkd3d_shader_tgsm_structured - { - struct vkd3d_shader_dst_param reg; -+ unsigned int alignment; - unsigned int byte_stride; - unsigned int structure_count; -+ bool zero_init; - }; - - struct vkd3d_shader_thread_group_size -@@ -1290,6 +1305,10 @@ struct vsir_program - struct vkd3d_shader_version shader_version; - struct vkd3d_shader_instruction_array instructions; - -+ struct shader_signature input_signature; -+ struct shader_signature output_signature; -+ struct shader_signature patch_constant_signature; -+ - unsigned int input_control_point_count, output_control_point_count; - unsigned int block_count; - unsigned int temp_count; -@@ -1302,6 +1321,10 @@ struct vsir_program - - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); - void vsir_program_cleanup(struct vsir_program *program); -+enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); -+enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, -+ const char *source_name, struct vkd3d_shader_message_context *message_context); - - static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( - struct vsir_program *program, unsigned int count) -@@ -1347,6 +1370,12 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse - parser->ops->parser_destroy(parser); - } - -+static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser) -+{ -+ return vsir_program_validate(&parser->program, parser->config_flags, -+ parser->location.source_name, parser->message_context); -+} -+ - struct vkd3d_shader_descriptor_info1 - { - enum vkd3d_shader_descriptor_type type; -@@ -1385,21 +1414,22 @@ struct vkd3d_string_buffer_cache - size_t count, max_count, capacity; - }; - --enum vsir_asm_dialect -+enum vsir_asm_flags - { -- VSIR_ASM_VSIR, -- VSIR_ASM_D3D, -+ VSIR_ASM_FLAG_NONE = 0, -+ VSIR_ASM_FLAG_DUMP_TYPES = 0x1, - }; - --enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, -+enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect); -+ struct vkd3d_shader_code *out, enum vsir_asm_flags flags); - void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); - struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); - void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); - void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *list); - void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *list); - void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer); -+void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size); - int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); - int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); - int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); -@@ -1408,6 +1438,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct - vkd3d_string_buffer_trace_(buffer, __FUNCTION__) - void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function); - int vkd3d_string_buffer_vprintf(struct vkd3d_string_buffer *buffer, const char *format, va_list args); -+void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer); - - struct vkd3d_bytecode_buffer - { -@@ -1483,20 +1514,15 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); - --void free_shader_desc(struct vkd3d_shader_desc *desc); -+void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); - - int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, -- struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); -+ struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc); - int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); - --struct vkd3d_glsl_generator; -- --struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, -- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); --int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, -- struct vsir_program *program, struct vkd3d_shader_code *out); --void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); -+int glsl_compile(struct vsir_program *program, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context); - - #define SPIRV_MAX_SRC_COUNT 6 - -@@ -1513,17 +1539,17 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, - int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - --enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser); -- - static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type( - enum vkd3d_data_type data_type) - { - switch (data_type) - { -+ case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ - case VKD3D_DATA_FLOAT: - case VKD3D_DATA_UNORM: - case VKD3D_DATA_SNORM: - return VKD3D_SHADER_COMPONENT_FLOAT; -+ case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ - case VKD3D_DATA_UINT: - return VKD3D_SHADER_COMPONENT_UINT; - case VKD3D_DATA_INT: -@@ -1760,7 +1786,4 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void - void dxbc_writer_init(struct dxbc_writer *dxbc); - int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); - --enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info); -- - #endif /* __VKD3D_SHADER_PRIVATE_H */ -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 17c7ccb3e31..7841a811bf7 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -94,6 +94,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), - VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), - VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), -+ VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), - VK_EXTENSION(EXT_MUTABLE_DESCRIPTOR_TYPE, EXT_mutable_descriptor_type), - VK_EXTENSION(EXT_ROBUSTNESS_2, EXT_robustness2), - VK_EXTENSION(EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_shader_demote_to_helper_invocation), -@@ -789,6 +790,7 @@ struct vkd3d_physical_device_info - VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_features; - VkPhysicalDeviceDepthClipEnableFeaturesEXT depth_clip_features; - VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features; -+ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_features; - VkPhysicalDeviceRobustness2FeaturesEXT robustness2_features; - VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote_features; - VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; -@@ -808,6 +810,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; - VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; - VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; -+ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; - VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; - VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; - VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; -@@ -825,6 +828,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - conditional_rendering_features = &info->conditional_rendering_features; - depth_clip_features = &info->depth_clip_features; - descriptor_indexing_features = &info->descriptor_indexing_features; -+ fragment_shader_interlock_features = &info->fragment_shader_interlock_features; - robustness2_features = &info->robustness2_features; - descriptor_indexing_properties = &info->descriptor_indexing_properties; - maintenance3_properties = &info->maintenance3_properties; -@@ -846,6 +850,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - vk_prepend_struct(&info->features2, depth_clip_features); - descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; - vk_prepend_struct(&info->features2, descriptor_indexing_features); -+ fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, fragment_shader_interlock_features); - robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - vk_prepend_struct(&info->features2, robustness2_features); - demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; -@@ -1158,6 +1164,7 @@ static void vkd3d_trace_physical_device_limits(const struct vkd3d_physical_devic - - static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_device_info *info) - { -+ const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; - const VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; - const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; - const VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; -@@ -1279,6 +1286,15 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev - TRACE(" VkPhysicalDeviceDepthClipEnableFeaturesEXT:\n"); - TRACE(" depthClipEnable: %#x.\n", depth_clip_features->depthClipEnable); - -+ fragment_shader_interlock_features = &info->fragment_shader_interlock_features; -+ TRACE(" VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT:\n"); -+ TRACE(" fragmentShaderSampleInterlock: %#x.\n.", -+ fragment_shader_interlock_features->fragmentShaderSampleInterlock); -+ TRACE(" fragmentShaderPixelInterlock: %#x\n.", -+ fragment_shader_interlock_features->fragmentShaderPixelInterlock); -+ TRACE(" fragmentShaderShadingRateInterlock: %#x\n.", -+ fragment_shader_interlock_features->fragmentShaderShadingRateInterlock); -+ - demote_features = &info->demote_features; - TRACE(" VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT:\n"); - TRACE(" shaderDemoteToHelperInvocation: %#x.\n", demote_features->shaderDemoteToHelperInvocation); -@@ -1476,6 +1492,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - uint32_t *device_extension_count, bool **user_extension_supported) - { - const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; -+ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; - const struct vkd3d_optional_device_extensions_info *optional_extensions; - VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; - VkPhysicalDevice physical_device = device->vk_physical_device; -@@ -1539,8 +1556,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageReadWithoutFormat - && d3d12_device_supports_typed_uav_load_additional_formats(device); -- /* GL_INTEL_fragment_shader_ordering, no Vulkan equivalent. */ -- device->feature_options.ROVsSupported = FALSE; - /* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */ - device->feature_options.ConservativeRasterizationTier = D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED; - device->feature_options.MaxGPUVirtualAddressBitsPerResource = 40; /* FIXME */ -@@ -1619,6 +1634,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - *user_extension_supported, vulkan_info, "device", - device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); - -+ fragment_shader_interlock = &physical_device_info->fragment_shader_interlock_features; -+ if (!fragment_shader_interlock->fragmentShaderSampleInterlock -+ || !fragment_shader_interlock->fragmentShaderPixelInterlock) -+ vulkan_info->EXT_fragment_shader_interlock = false; -+ device->feature_options.ROVsSupported = vulkan_info->EXT_fragment_shader_interlock; -+ - if (!physical_device_info->conditional_rendering_features.conditionalRendering) - vulkan_info->EXT_conditional_rendering = false; - if (!physical_device_info->depth_clip_features.depthClipEnable) -@@ -1675,6 +1696,10 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] - = VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING; - -+ if (vulkan_info->EXT_fragment_shader_interlock) -+ vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] -+ = VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK; -+ - if (vulkan_info->EXT_shader_stencil_export) - vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] - = VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT; -@@ -2499,17 +2524,18 @@ static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cach - } - - /* ID3D12Device */ --static inline struct d3d12_device *impl_from_ID3D12Device7(ID3D12Device7 *iface) -+static inline struct d3d12_device *impl_from_ID3D12Device8(ID3D12Device8 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device7_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device8_iface); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device8 *iface, - REFIID riid, void **object) - { - TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - -- if (IsEqualGUID(riid, &IID_ID3D12Device7) -+ if (IsEqualGUID(riid, &IID_ID3D12Device8) -+ || IsEqualGUID(riid, &IID_ID3D12Device7) - || IsEqualGUID(riid, &IID_ID3D12Device6) - || IsEqualGUID(riid, &IID_ID3D12Device5) - || IsEqualGUID(riid, &IID_ID3D12Device4) -@@ -2531,9 +2557,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *ifac - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device7 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device8 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - unsigned int refcount = vkd3d_atomic_increment_u32(&device->refcount); - - TRACE("%p increasing refcount to %u.\n", device, refcount); -@@ -2563,9 +2589,9 @@ static HRESULT device_worker_stop(struct d3d12_device *device) - return S_OK; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device8 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - unsigned int refcount = vkd3d_atomic_decrement_u32(&device->refcount); - - TRACE("%p decreasing refcount to %u.\n", device, refcount); -@@ -2602,10 +2628,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device8 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2613,10 +2639,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *ifac - return vkd3d_get_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device8 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2624,19 +2650,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *ifac - return vkd3d_set_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device8 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&device->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device8 *iface, const WCHAR *name) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); - -@@ -2644,17 +2670,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, cons - VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device7 *iface) -+static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device8 *iface) - { - TRACE("iface %p.\n", iface); - - return 1; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device8 *iface, - const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_command_queue *object; - HRESULT hr; - -@@ -2668,10 +2694,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 * - riid, command_queue); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device8 *iface, - D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_command_allocator *object; - HRESULT hr; - -@@ -2685,10 +2711,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic - riid, command_allocator); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device8 *iface, - const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2702,10 +2728,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device8 *iface, - const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2719,11 +2745,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device8 *iface, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, - ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_command_list *object; - HRESULT hr; - -@@ -2846,10 +2872,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) - return true; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device8 *iface, - D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", - iface, feature, feature_data, feature_data_size); -@@ -3521,10 +3547,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device8 *iface, - const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_descriptor_heap *object; - HRESULT hr; - -@@ -3538,7 +3564,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 - &IID_ID3D12DescriptorHeap, riid, descriptor_heap); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device7 *iface, -+static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device8 *iface, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { - TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); -@@ -3561,11 +3587,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device8 *iface, - UINT node_mask, const void *bytecode, SIZE_T bytecode_length, - REFIID riid, void **root_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_root_signature *object; - HRESULT hr; - -@@ -3581,10 +3607,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 - &IID_ID3D12RootSignature, riid, root_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device8 *iface, - const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); -@@ -3593,11 +3619,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device8 *iface, - ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, desc %p, descriptor %s.\n", -@@ -3607,11 +3633,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device8 *iface, - ID3D12Resource *resource, ID3D12Resource *counter_resource, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %s.\n", -@@ -3622,7 +3648,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device8 *iface, - ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3630,10 +3656,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 - iface, resource, desc, debug_cpu_handle(descriptor)); - - d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device8(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device8 *iface, - ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3641,13 +3667,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 - iface, resource, desc, debug_cpu_handle(descriptor)); - - d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device8(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device8 *iface, - const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); -@@ -3656,14 +3682,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device8 *iface, - UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, - const UINT *dst_descriptor_range_sizes, - UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, - const UINT *src_descriptor_range_sizes, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - unsigned int dst_range_size, src_range_size; - struct d3d12_descriptor_heap *dst_heap; -@@ -3719,7 +3745,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, - } - } - --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device8 *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -@@ -3850,10 +3876,10 @@ static void d3d12_device_get_resource_allocation_info(struct d3d12_device *devic - } - - static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( -- ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, -+ ID3D12Device8 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, - UINT count, const D3D12_RESOURCE_DESC *resource_descs) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p.\n", - iface, info, visible_mask, count, resource_descs); -@@ -3865,10 +3891,10 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour - return info; - } - --static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device7 *iface, -+static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device8 *iface, - D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - bool coherent; - - TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", -@@ -3908,12 +3934,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope - return heap_properties; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device8 *iface, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_resource *object; - HRESULT hr; -@@ -3935,10 +3961,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device8 *iface, - const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_heap *object; - HRESULT hr; - -@@ -3954,12 +3980,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, - return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device8 *iface, - ID3D12Heap *heap, UINT64 heap_offset, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_heap *heap_object; - struct d3d12_resource *object; -@@ -3980,11 +4006,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device8 *iface, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_resource *object; - HRESULT hr; -@@ -4001,11 +4027,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device8 *iface, - ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, - const WCHAR *name, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", - iface, object, attributes, (uint32_t)access, debugstr_w(name, device->wchar_size), handle); -@@ -4013,7 +4039,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 * - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device8 *iface, - HANDLE handle, REFIID riid, void **object) - { - FIXME("iface %p, handle %p, riid %s, object %p stub!\n", -@@ -4022,10 +4048,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *if - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device8 *iface, - const WCHAR *name, DWORD access, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - FIXME("iface %p, name %s, access %#x, handle %p stub!\n", - iface, debugstr_w(name, device->wchar_size), (uint32_t)access, handle); -@@ -4033,7 +4059,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device8 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - ID3D12Fence *fence; -@@ -4041,17 +4067,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, - - TRACE("iface %p, object_count %u, objects %p.\n", iface, object_count, objects); - -- if (FAILED(hr = ID3D12Device7_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) -+ if (FAILED(hr = ID3D12Device8_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) - return hr; - -- hr = ID3D12Device7_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); -+ hr = ID3D12Device8_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); - if (SUCCEEDED(hr)) - ID3D12Fence_SetEventOnCompletion(fence, 1, NULL); - ID3D12Fence_Release(fence); - return hr; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device8 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", -@@ -4060,10 +4086,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device8 *iface, - UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_fence *object; - HRESULT hr; - -@@ -4076,9 +4102,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, - return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device7 *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device8 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p.\n", iface); - -@@ -4163,12 +4189,12 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, - *total_bytes = total; - } - --static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device8 *iface, - const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, - UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, - UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - D3D12_RESOURCE_DESC1 resource_desc; - - TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " -@@ -4182,10 +4208,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 * - base_offset, layouts, row_counts, row_sizes, total_bytes); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device8 *iface, - const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_query_heap *object; - HRESULT hr; - -@@ -4198,18 +4224,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *ifa - return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device7 *iface, BOOL enable) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device8 *iface, BOOL enable) - { - FIXME("iface %p, enable %#x stub!\n", iface, enable); - - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device8 *iface, - const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, - REFIID iid, void **command_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_command_signature *object; - HRESULT hr; - -@@ -4223,14 +4249,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic - &IID_ID3D12CommandSignature, iid, command_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device8 *iface, - ID3D12Resource *resource, UINT *total_tile_count, - D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, - UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, - D3D12_SUBRESOURCE_TILING *sub_resource_tilings) - { - const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " - "standard_title_shape %p, sub_resource_tiling_count %p, " -@@ -4243,9 +4269,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *ifac - sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); - } - --static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface, LUID *luid) -+static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device8 *iface, LUID *luid) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p, luid %p.\n", iface, luid); - -@@ -4254,7 +4280,7 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface - return luid; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device8 *iface, - const void *blob, SIZE_T blob_size, REFIID iid, void **lib) - { - FIXME("iface %p, blob %p, blob_size %"PRIuPTR", iid %s, lib %p stub!\n", -@@ -4263,7 +4289,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device - return DXGI_ERROR_UNSUPPORTED; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device8 *iface, - ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, - D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) - { -@@ -4273,7 +4299,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion( - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device8 *iface, - UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) - { - FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); -@@ -4281,10 +4307,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device8 *iface, - const D3D12_PIPELINE_STATE_STREAM_DESC *desc, REFIID iid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -4296,7 +4322,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 - return return_interface(&object->ID3D12PipelineState_iface, &IID_ID3D12PipelineState, iid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device8 *iface, - const void *address, REFIID iid, void **heap) - { - FIXME("iface %p, address %p, iid %s, heap %p stub!\n", iface, address, debugstr_guid(iid), heap); -@@ -4304,7 +4330,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device8 *iface, - HANDLE file_mapping, REFIID iid, void **heap) - { - FIXME("iface %p, file_mapping %p, iid %s, heap %p stub!\n", iface, file_mapping, debugstr_guid(iid), heap); -@@ -4312,7 +4338,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device8 *iface, - D3D12_RESIDENCY_FLAGS flags, UINT num_objects, ID3D12Pageable *const *objects, - ID3D12Fence *fence, UINT64 fence_value) - { -@@ -4323,7 +4349,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device8 *iface, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, D3D12_COMMAND_LIST_FLAGS flags, - REFIID iid, void **command_list) - { -@@ -4333,7 +4359,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 * - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device8 *iface, - const D3D12_PROTECTED_RESOURCE_SESSION_DESC *desc, REFIID iid, void **session) - { - FIXME("iface %p, desc %p, iid %s, session %p stub!\n", iface, desc, debugstr_guid(iid), session); -@@ -4341,13 +4367,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device8 *iface, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, - ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_resource *object; - HRESULT hr; -@@ -4369,11 +4395,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Dev - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device8 *iface, - const D3D12_HEAP_DESC *desc, ID3D12ProtectedResourceSession *protected_session, - REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - struct d3d12_heap *object; - HRESULT hr; - -@@ -4389,7 +4415,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, - return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device8 *iface, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, - ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) -@@ -4403,11 +4429,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Devi - } - - static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo1( -- ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, -+ ID3D12Device8 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, - UINT count, const D3D12_RESOURCE_DESC *resource_descs, - D3D12_RESOURCE_ALLOCATION_INFO1 *info1) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); - - TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", - iface, info, visible_mask, count, resource_descs, info1); -@@ -4419,7 +4445,7 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour - return info; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device8 *iface, - ID3D12LifetimeOwner *owner, REFIID iid, void **tracker) - { - FIXME("iface %p, owner %p, iid %s, tracker %p stub!\n", iface, owner, debugstr_guid(iid), tracker); -@@ -4427,12 +4453,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device - return E_NOTIMPL; - } - --static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device7 *iface) -+static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device8 *iface) - { - FIXME("iface %p stub!\n", iface); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device8 *iface, - UINT *num_meta_commands, D3D12_META_COMMAND_DESC *command_desc) - { - FIXME("iface %p, num_meta_commands %p, command_desc %p stub!\n", iface, -@@ -4441,7 +4467,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device8 *iface, - REFGUID command_id, D3D12_META_COMMAND_PARAMETER_STAGE stage, - UINT *size_in_bytes, UINT *parameter_count, - D3D12_META_COMMAND_PARAMETER_DESC *parameter_desc) -@@ -4453,7 +4479,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device8 *iface, - REFGUID command_id, UINT node_mask, const void *parameters_data, - SIZE_T data_size_in_bytes, REFIID iid, void **meta_command) - { -@@ -4465,7 +4491,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *i - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device8 *iface, - const D3D12_STATE_OBJECT_DESC *desc, REFIID iid, void **state_object) - { - FIXME("iface %p, desc %p, iid %s, state_object %p stub!\n", iface, desc, debugstr_guid(iid), state_object); -@@ -4473,14 +4499,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *i - return E_NOTIMPL; - } - --static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device8 *iface, - const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc, - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO *info) - { - FIXME("iface %p, desc %p, info %p stub!\n", iface, desc, info); - } - --static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device7 *iface, -+static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device8 *iface, - D3D12_SERIALIZED_DATA_TYPE data_type, const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER *identifier) - { - FIXME("iface %p, data_type %u, identifier %p stub!\n", iface, data_type, identifier); -@@ -4488,7 +4514,7 @@ static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_Ch - return D3D12_DRIVER_MATCHING_IDENTIFIER_UNRECOGNIZED; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device8 *iface, - D3D12_BACKGROUND_PROCESSING_MODE mode, D3D12_MEASUREMENTS_ACTION action, HANDLE event, - BOOL *further_measurements_desired) - { -@@ -4498,7 +4524,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device8 *iface, - const D3D12_STATE_OBJECT_DESC *addition, ID3D12StateObject *state_object_to_grow_from, - REFIID riid, void **new_state_object) - { -@@ -4508,7 +4534,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *if - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device8 *iface, - const D3D12_PROTECTED_RESOURCE_SESSION_DESC1 *desc, REFIID riid, void **session) - { - FIXME("iface %p, desc %p, riid %s, session %p stub!\n", iface, desc, debugstr_guid(riid), session); -@@ -4516,7 +4542,94 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID - return E_NOTIMPL; - } - --static const struct ID3D12Device7Vtbl d3d12_device_vtbl = -+static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo2(ID3D12Device8 *iface, -+ D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, -+ const D3D12_RESOURCE_DESC1 *resource_descs, D3D12_RESOURCE_ALLOCATION_INFO1 *info1) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ -+ TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", -+ iface, info, visible_mask, count, resource_descs, info1); -+ -+ debug_ignored_node_mask(visible_mask); -+ -+ d3d12_device_get_resource1_allocation_info(device, info1, count, resource_descs, info); -+ -+ return info; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2(ID3D12Device8 *iface, -+ const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *desc, -+ D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, -+ ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_resource *object; -+ HRESULT hr; -+ -+ TRACE("iface %p, heap_properties %p, heap_flags %#x, desc %p, initial_state %#x, " -+ "optimized_clear_value %p, protected_session %p, iid %s, resource %p.\n", -+ iface, heap_properties, heap_flags, desc, initial_state, -+ optimized_clear_value, protected_session, debugstr_guid(iid), resource); -+ -+ if (FAILED(hr = d3d12_committed_resource_create(device, heap_properties, heap_flags, -+ desc, initial_state, optimized_clear_value, protected_session, &object))) -+ { -+ *resource = NULL; -+ return hr; -+ } -+ -+ return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1(ID3D12Device8 *iface, -+ ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC1 *resource_desc, -+ D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, -+ REFIID iid, void **resource) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_heap *heap_object; -+ struct d3d12_resource *object; -+ HRESULT hr; -+ -+ TRACE("iface %p, heap %p, heap_offset %#"PRIx64", desc %p, initial_state %#x, " -+ "optimized_clear_value %p, iid %s, resource %p.\n", -+ iface, heap, heap_offset, resource_desc, initial_state, -+ optimized_clear_value, debugstr_guid(iid), resource); -+ -+ heap_object = unsafe_impl_from_ID3D12Heap(heap); -+ -+ if (FAILED(hr = d3d12_placed_resource_create(device, heap_object, heap_offset, -+ resource_desc, initial_state, optimized_clear_value, &object))) -+ return hr; -+ -+ return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); -+} -+ -+static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView(ID3D12Device8 *iface, -+ ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) -+{ -+ FIXME("iface %p, target_resource %p, feedback_resource %p, descriptor %s stub!\n", -+ iface, target_resource, feedback_resource, debug_cpu_handle(descriptor)); -+} -+ -+static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints1(ID3D12Device8 *iface, -+ const D3D12_RESOURCE_DESC1 *desc, UINT first_sub_resource, UINT sub_resource_count, -+ UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, -+ UINT64 *row_sizes, UINT64 *total_bytes) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ -+ TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " -+ "layouts %p, row_counts %p, row_sizes %p, total_bytes %p.\n", -+ iface, desc, first_sub_resource, sub_resource_count, base_offset, -+ layouts, row_counts, row_sizes, total_bytes); -+ -+ d3d12_device_get_copyable_footprints(device, desc, first_sub_resource, sub_resource_count, -+ base_offset, layouts, row_counts, row_sizes, total_bytes); -+} -+ -+static const struct ID3D12Device8Vtbl d3d12_device_vtbl = - { - /* IUnknown methods */ - d3d12_device_QueryInterface, -@@ -4596,14 +4709,20 @@ static const struct ID3D12Device7Vtbl d3d12_device_vtbl = - /* ID3D12Device7 methods */ - d3d12_device_AddToStateObject, - d3d12_device_CreateProtectedResourceSession1, -+ /* ID3D12Device8 methods */ -+ d3d12_device_GetResourceAllocationInfo2, -+ d3d12_device_CreateCommittedResource2, -+ d3d12_device_CreatePlacedResource1, -+ d3d12_device_CreateSamplerFeedbackUnorderedAccessView, -+ d3d12_device_GetCopyableFootprints1, - }; - --struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface) -+struct d3d12_device *unsafe_impl_from_ID3D12Device8(ID3D12Device8 *iface) - { - if (!iface) - return NULL; - assert(iface->lpVtbl == &d3d12_device_vtbl); -- return impl_from_ID3D12Device7(iface); -+ return impl_from_ID3D12Device8(iface); - } - - static void *device_worker_main(void *arg) -@@ -4646,7 +4765,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, - const struct vkd3d_vk_device_procs *vk_procs; - HRESULT hr; - -- device->ID3D12Device7_iface.lpVtbl = &d3d12_device_vtbl; -+ device->ID3D12Device8_iface.lpVtbl = &d3d12_device_vtbl; - device->refcount = 1; - - vkd3d_instance_incref(device->vkd3d_instance = instance); -@@ -4894,28 +5013,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha - - IUnknown *vkd3d_get_device_parent(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); - - return d3d12_device->parent; - } - - VkDevice vkd3d_get_vk_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); - - return d3d12_device->vk_device; - } - - VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); - - return d3d12_device->vk_physical_device; - } - - struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); - - return d3d12_device->vkd3d_instance; - } -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 89764d0901d..446ef3ab0db 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -1857,6 +1857,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1 - - HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device) - { -+ const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion; - const struct vkd3d_format *format; - - switch (desc->Dimension) -@@ -1926,6 +1927,12 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 - - d3d12_validate_resource_flags(desc->Flags); - -+ if (mip_region->Width && mip_region->Height && mip_region->Depth) -+ { -+ FIXME("Unhandled sampler feedback mip region size (%u, %u, %u).\n", mip_region->Width, mip_region->Height, -+ mip_region->Depth); -+ } -+ - return S_OK; - } - -@@ -2253,7 +2260,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - HRESULT vkd3d_create_image_resource(ID3D12Device *device, - const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) - { -- struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device8((ID3D12Device8 *)device); - struct d3d12_resource *object; - HRESULT hr; - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index 7919b7d8760..f6925d47bdf 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, - - if (!device) - { -- ID3D12Device_Release(&object->ID3D12Device7_iface); -+ ID3D12Device_Release(&object->ID3D12Device8_iface); - return S_FALSE; - } - -- return return_interface(&object->ID3D12Device7_iface, &IID_ID3D12Device, iid, device); -+ return return_interface(&object->ID3D12Device8_iface, &IID_ID3D12Device, iid, device); - } - - /* ID3D12RootSignatureDeserializer */ -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index b092bb26ded..39d892a6fa7 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -55,7 +55,7 @@ - - #define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u - #define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u --#define VKD3D_MAX_SHADER_EXTENSIONS 4u -+#define VKD3D_MAX_SHADER_EXTENSIONS 5u - #define VKD3D_MAX_SHADER_STAGES 5u - #define VKD3D_MAX_VK_SYNC_OBJECTS 4u - #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u -@@ -133,6 +133,7 @@ struct vkd3d_vulkan_info - bool EXT_debug_marker; - bool EXT_depth_clip_enable; - bool EXT_descriptor_indexing; -+ bool EXT_fragment_shader_interlock; - bool EXT_mutable_descriptor_type; - bool EXT_robustness2; - bool EXT_shader_demote_to_helper_invocation; -@@ -202,36 +203,11 @@ union vkd3d_thread_handle - void *handle; - }; - --struct vkd3d_mutex --{ -- CRITICAL_SECTION lock; --}; -- - struct vkd3d_cond - { - CONDITION_VARIABLE cond; - }; - --static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) --{ -- InitializeCriticalSection(&lock->lock); --} -- --static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) --{ -- EnterCriticalSection(&lock->lock); --} -- --static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) --{ -- LeaveCriticalSection(&lock->lock); --} -- --static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) --{ -- DeleteCriticalSection(&lock->lock); --} -- - static inline void vkd3d_cond_init(struct vkd3d_cond *cond) - { - InitializeConditionVariable(&cond->cond); -@@ -287,53 +263,11 @@ union vkd3d_thread_handle - void *handle; - }; - --struct vkd3d_mutex --{ -- pthread_mutex_t lock; --}; -- - struct vkd3d_cond - { - pthread_cond_t cond; - }; - -- --static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_mutex_init(&lock->lock, NULL); -- if (ret) -- ERR("Could not initialize the mutex, error %d.\n", ret); --} -- --static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_mutex_lock(&lock->lock); -- if (ret) -- ERR("Could not lock the mutex, error %d.\n", ret); --} -- --static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_mutex_unlock(&lock->lock); -- if (ret) -- ERR("Could not unlock the mutex, error %d.\n", ret); --} -- --static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_mutex_destroy(&lock->lock); -- if (ret) -- ERR("Could not destroy the mutex, error %d.\n", ret); --} -- - static inline void vkd3d_cond_init(struct vkd3d_cond *cond) - { - int ret; -@@ -1735,7 +1669,7 @@ struct vkd3d_desc_object_cache - /* ID3D12Device */ - struct d3d12_device - { -- ID3D12Device7 ID3D12Device7_iface; -+ ID3D12Device8 ID3D12Device8_iface; - unsigned int refcount; - - VkDevice vk_device; -@@ -1810,29 +1744,29 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 - bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); - void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, - const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); --struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface); -+struct d3d12_device *unsafe_impl_from_ID3D12Device8(ID3D12Device8 *iface); - HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); - void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); - - static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) - { -- return ID3D12Device7_QueryInterface(&device->ID3D12Device7_iface, iid, object); -+ return ID3D12Device8_QueryInterface(&device->ID3D12Device8_iface, iid, object); - } - - static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) - { -- return ID3D12Device7_AddRef(&device->ID3D12Device7_iface); -+ return ID3D12Device8_AddRef(&device->ID3D12Device8_iface); - } - - static inline ULONG d3d12_device_release(struct d3d12_device *device) - { -- return ID3D12Device7_Release(&device->ID3D12Device7_iface); -+ return ID3D12Device8_Release(&device->ID3D12Device8_iface); - } - - static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) - { -- return ID3D12Device7_GetDescriptorHandleIncrementSize(&device->ID3D12Device7_iface, descriptor_type); -+ return ID3D12Device8_GetDescriptorHandleIncrementSize(&device->ID3D12Device8_iface, descriptor_type); - } - - /* utils */ --- -2.43.0 - diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch new file mode 100644 index 00000000..9f7cc4b9 --- /dev/null +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch @@ -0,0 +1,17272 @@ +From f25b43e2392873bd723d112513abf5987bb32313 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 7 Mar 2024 10:40:41 +1100 +Subject: [PATCH] Updated vkd3d to 4a209efb6278586d412ceb0a7cbe21e6769a7367. + +--- + libs/vkd3d/Makefile.in | 1 + + libs/vkd3d/include/private/vkd3d_common.h | 189 +- + libs/vkd3d/include/private/vkd3d_memory.h | 11 +- + libs/vkd3d/include/vkd3d_shader.h | 69 +- + libs/vkd3d/include/vkd3d_types.h | 2 + + libs/vkd3d/libs/vkd3d-common/blob.c | 3 +- + libs/vkd3d/libs/vkd3d-common/debug.c | 2 +- + libs/vkd3d/libs/vkd3d-common/error.c | 1 - + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 1151 +++--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 165 +- + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 39 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 1610 ++++++++- + libs/vkd3d/libs/vkd3d-shader/fx.c | 517 ++- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 108 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 495 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 120 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 7 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 730 +++- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 535 ++- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 23 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 3172 +++++++++++++---- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 383 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 229 +- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 280 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 118 +- + libs/vkd3d/libs/vkd3d/cache.c | 59 + + libs/vkd3d/libs/vkd3d/command.c | 20 + + libs/vkd3d/libs/vkd3d/device.c | 732 +++- + libs/vkd3d/libs/vkd3d/resource.c | 21 +- + libs/vkd3d/libs/vkd3d/state.c | 34 +- + libs/vkd3d/libs/vkd3d/utils.c | 10 + + libs/vkd3d/libs/vkd3d/vkd3d_main.c | 22 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 92 +- + 33 files changed, 8450 insertions(+), 2500 deletions(-) + create mode 100644 libs/vkd3d/libs/vkd3d/cache.c + +diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in +index 448e9a0e61d..94e4833dc9a 100644 +--- a/libs/vkd3d/Makefile.in ++++ b/libs/vkd3d/Makefile.in +@@ -30,6 +30,7 @@ SOURCES = \ + libs/vkd3d-shader/spirv.c \ + libs/vkd3d-shader/tpf.c \ + libs/vkd3d-shader/vkd3d_shader_main.c \ ++ libs/vkd3d/cache.c \ + libs/vkd3d/command.c \ + libs/vkd3d/device.c \ + libs/vkd3d/resource.c \ +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index 6a3b530d868..f9df47d339c 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -30,6 +30,9 @@ + #include + #include + #include ++#ifndef _WIN32 ++#include ++#endif + + #ifdef _MSC_VER + #include +@@ -72,6 +75,8 @@ + #define TAG_XNAP VKD3D_MAKE_TAG('X', 'N', 'A', 'P') + #define TAG_XNAS VKD3D_MAKE_TAG('X', 'N', 'A', 'S') + ++#define TAG_RD11_REVERSE 0x25441313 ++ + static inline uint64_t align(uint64_t addr, size_t alignment) + { + return (addr + (alignment - 1)) & ~(alignment - 1); +@@ -105,11 +110,130 @@ VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsig + #define vkd3d_unreachable() vkd3d_unreachable_(__FILE__, __LINE__) + #endif + ++#ifdef VKD3D_NO_TRACE_MESSAGES ++#define TRACE(args...) do { } while (0) ++#define TRACE_ON() (false) ++#endif ++ ++#ifdef VKD3D_NO_DEBUG_MESSAGES ++#define WARN(args...) do { } while (0) ++#define FIXME(args...) do { } while (0) ++#endif ++ ++enum vkd3d_dbg_level ++{ ++ VKD3D_DBG_LEVEL_NONE, ++ VKD3D_DBG_LEVEL_ERR, ++ VKD3D_DBG_LEVEL_FIXME, ++ VKD3D_DBG_LEVEL_WARN, ++ VKD3D_DBG_LEVEL_TRACE, ++}; ++ ++enum vkd3d_dbg_level vkd3d_dbg_get_level(void); ++ ++void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); ++void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback); ++ ++const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2); ++const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args); ++const char *debugstr_a(const char *str); ++const char *debugstr_an(const char *str, size_t n); ++const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); ++ ++#define VKD3D_DBG_LOG(level) \ ++ do { \ ++ const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ ++ VKD3D_DBG_PRINTF ++ ++#define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ ++ do { \ ++ static bool vkd3d_dbg_next_time; \ ++ const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ ++ ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ ++ vkd3d_dbg_next_time = true; \ ++ VKD3D_DBG_PRINTF ++ ++#define VKD3D_DBG_PRINTF(...) \ ++ vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) ++ ++#ifndef TRACE ++#define TRACE VKD3D_DBG_LOG(TRACE) ++#endif ++ ++#ifndef WARN ++#define WARN VKD3D_DBG_LOG(WARN) ++#endif ++ ++#ifndef FIXME ++#define FIXME VKD3D_DBG_LOG(FIXME) ++#endif ++ ++#define ERR VKD3D_DBG_LOG(ERR) ++ ++#ifndef TRACE_ON ++#define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) ++#endif ++ ++#ifndef WARN_ON ++#define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) ++#endif ++ ++#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) ++ ++#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name ++ ++static inline const char *debugstr_guid(const GUID *guid) ++{ ++ if (!guid) ++ return "(null)"; ++ ++ return vkd3d_dbg_sprintf("{%08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}", ++ (unsigned long)guid->Data1, guid->Data2, guid->Data3, guid->Data4[0], ++ guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4], ++ guid->Data4[5], guid->Data4[6], guid->Data4[7]); ++} ++ ++static inline const char *debugstr_hresult(HRESULT hr) ++{ ++ switch (hr) ++ { ++#define TO_STR(u) case u: return #u; ++ TO_STR(S_OK) ++ TO_STR(S_FALSE) ++ TO_STR(E_NOTIMPL) ++ TO_STR(E_NOINTERFACE) ++ TO_STR(E_POINTER) ++ TO_STR(E_ABORT) ++ TO_STR(E_FAIL) ++ TO_STR(E_OUTOFMEMORY) ++ TO_STR(E_INVALIDARG) ++ TO_STR(DXGI_ERROR_NOT_FOUND) ++ TO_STR(DXGI_ERROR_MORE_DATA) ++ TO_STR(DXGI_ERROR_UNSUPPORTED) ++#undef TO_STR ++ default: ++ return vkd3d_dbg_sprintf("%#x", (int)hr); ++ } ++} ++ ++unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value); ++ ++struct vkd3d_debug_option ++{ ++ const char *name; ++ uint64_t flag; ++}; ++ ++bool vkd3d_debug_list_has_member(const char *string, const char *member); ++uint64_t vkd3d_parse_debug_options(const char *string, ++ const struct vkd3d_debug_option *options, unsigned int option_count); ++void vkd3d_set_thread_name(const char *name); ++ + static inline unsigned int vkd3d_popcount(unsigned int v) + { + #ifdef _MSC_VER + return __popcnt(v); +-#elif defined(__MINGW32__) ++#elif defined(HAVE_BUILTIN_POPCOUNT) + return __builtin_popcount(v); + #else + v -= (v >> 1) & 0x55555555; +@@ -305,6 +429,69 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) + return vkd3d_atomic_add_fetch_u32(x, 1); + } + ++struct vkd3d_mutex ++{ ++#ifdef _WIN32 ++ CRITICAL_SECTION lock; ++#else ++ pthread_mutex_t lock; ++#endif ++}; ++ ++#ifdef _WIN32 ++#define VKD3D_MUTEX_INITIALIZER {{NULL, -1, 0, 0, 0, 0}} ++#else ++#define VKD3D_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER ++#endif ++ ++static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) ++{ ++#ifdef _WIN32 ++ InitializeCriticalSection(&lock->lock); ++#else ++ int ret; ++ ++ if ((ret = pthread_mutex_init(&lock->lock, NULL))) ++ ERR("Failed to initialise the mutex, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) ++{ ++#ifdef _WIN32 ++ EnterCriticalSection(&lock->lock); ++#else ++ int ret; ++ ++ if ((ret = pthread_mutex_lock(&lock->lock))) ++ ERR("Failed to lock the mutex, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) ++{ ++#ifdef _WIN32 ++ LeaveCriticalSection(&lock->lock); ++#else ++ int ret; ++ ++ if ((ret = pthread_mutex_unlock(&lock->lock))) ++ ERR("Failed to unlock the mutex, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) ++{ ++#ifdef _WIN32 ++ DeleteCriticalSection(&lock->lock); ++#else ++ int ret; ++ ++ if ((ret = pthread_mutex_destroy(&lock->lock))) ++ ERR("Failed to destroy the mutex, ret %d.\n", ret); ++#endif ++} ++ + static inline void vkd3d_parse_version(const char *version, int *major, int *minor) + { + *major = atoi(version); +diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h +index 8a2edb1000d..682d35c03c6 100644 +--- a/libs/vkd3d/include/private/vkd3d_memory.h ++++ b/libs/vkd3d/include/private/vkd3d_memory.h +@@ -24,7 +24,7 @@ + #include + #include + +-#include "vkd3d_debug.h" ++#include "vkd3d_common.h" + + static inline void *vkd3d_malloc(size_t size) + { +@@ -65,6 +65,15 @@ static inline char *vkd3d_strdup(const char *string) + return ptr; + } + ++static inline void *vkd3d_memdup(const void *mem, size_t size) ++{ ++ void *ptr; ++ ++ if ((ptr = vkd3d_malloc(size))) ++ memcpy(ptr, mem, size); ++ return ptr; ++} ++ + bool vkd3d_array_reserve(void **elements, size_t *capacity, size_t element_count, size_t element_size); + + #endif /* __VKD3D_MEMORY_H */ +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index 9e663919c38..2b32b8a3e98 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -21,6 +21,7 @@ + + #include + #include ++#include + #include + + #ifdef __cplusplus +@@ -148,6 +149,12 @@ enum vkd3d_shader_compile_option_formatting_flags + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_OFFSETS = 0x00000004, + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER = 0x00000008, + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_RAW_IDS = 0x00000010, ++ /** ++ * Emit the signatures when disassembling a shader. ++ * ++ * \since 1.12 ++ */ ++ VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES = 0x00000020, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), + }; +@@ -208,10 +215,33 @@ enum vkd3d_shader_compile_option_feature_flags + * This corresponds to the "shaderFloat64" feature in the Vulkan API, and + * the "GL_ARB_gpu_shader_fp64" extension in the OpenGL API. */ + VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64 = 0x00000002, ++ /** The SPIR-V target environment supports wave operations. ++ * This flag is valid only in VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1 ++ * or greater, and corresponds to the following minimum requirements in ++ * VkPhysicalDeviceSubgroupProperties: ++ * - subgroupSize >= 4. ++ * - supportedOperations has BASIC, VOTE, ARITHMETIC, BALLOT, SHUFFLE and ++ * QUAD bits set. ++ * - supportedStages include COMPUTE and FRAGMENT. \since 1.12 */ ++ VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS = 0x00000004, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLAGS), + }; + ++/** ++ * Flags for vkd3d_shader_parse_dxbc(). ++ * ++ * \since 1.12 ++ */ ++enum vkd3d_shader_parse_dxbc_flags ++{ ++ /** Ignore the checksum and continue parsing even if it is ++ * incorrect. */ ++ VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM = 0x00000001, ++ ++ VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARSE_DXBC_FLAGS), ++}; ++ + enum vkd3d_shader_compile_option_name + { + /** +@@ -279,6 +309,36 @@ enum vkd3d_shader_compile_option_name + * \since 1.11 + */ + VKD3D_SHADER_COMPILE_OPTION_FEATURE = 0x0000000a, ++ /** ++ * If \a value is non-zero compilation will produce a child effect using ++ * shared object descriptions, as instructed by the "shared" modifier. ++ * Child effects are supported with fx_4_0, and fx_4_1 profiles. This option ++ * and "shared" modifiers are ignored for the fx_5_0 profile and non-fx profiles. ++ * The fx_2_0 profile does not have a separate concept of child effects, variables ++ * marked with "shared" modifier will be marked as such in a binary. ++ * ++ * \since 1.12 ++ */ ++ VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT = 0x0000000b, ++ /** ++ * If \a value is nonzero, emit a compile warning warn when vectors or ++ * matrices are truncated in an implicit conversion. ++ * If warnings are disabled, this option has no effect. ++ * This option has no effects for targets other than HLSL. ++ * ++ * The default value is nonzero, i.e. enable implicit truncation warnings. ++ * ++ * \since 1.12 ++ */ ++ VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION = 0x0000000c, ++ /** ++ * If \a value is nonzero, empty constant buffers descriptions are ++ * written out in the output effect binary. This option applies only ++ * to fx_4_0 and fx_4_1 profiles and is otherwise ignored. ++ * ++ * \since 1.12 ++ */ ++ VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS = 0x0000000d, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), + }; +@@ -872,6 +932,8 @@ enum vkd3d_shader_spirv_environment + VKD3D_SHADER_SPIRV_ENVIRONMENT_NONE, + VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5, + VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0, /* default target */ ++ /** \since 1.12 */ ++ VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_ENVIRONMENT), + }; +@@ -886,6 +948,8 @@ enum vkd3d_shader_spirv_extension + VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT, + /** \since 1.11 */ + VKD3D_SHADER_SPIRV_EXTENSION_EXT_VIEWPORT_INDEX_LAYER, ++ /** \since 1.12 */ ++ VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_EXTENSION), + }; +@@ -2377,9 +2441,8 @@ VKD3D_SHADER_API void vkd3d_shader_free_dxbc(struct vkd3d_shader_dxbc_desc *dxbc + * + * \param dxbc A vkd3d_shader_code structure containing the DXBC blob to parse. + * +- * \param flags A set of flags modifying the behaviour of the function. No +- * flags are defined for this version of vkd3d-shader, and this parameter +- * should be set to 0. ++ * \param flags A combination of zero or more elements of enum ++ * vkd3d_shader_parse_dxbc_flags. + * + * \param desc A vkd3d_shader_dxbc_desc structure describing the contents of + * the DXBC blob. Its vkd3d_shader_dxbc_section_desc structures will contain +diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h +index 12ceef42fc4..017eaf11806 100644 +--- a/libs/vkd3d/include/vkd3d_types.h ++++ b/libs/vkd3d/include/vkd3d_types.h +@@ -41,6 +41,8 @@ enum vkd3d_result + { + /** Success. */ + VKD3D_OK = 0, ++ /** Success as a result of there being nothing to do. */ ++ VKD3D_FALSE = 1, + /** An unspecified failure occurred. */ + VKD3D_ERROR = -1, + /** There are not enough resources available to complete the operation. */ +diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c +index 06a12ef5bc4..6bc95dc55c4 100644 +--- a/libs/vkd3d/libs/vkd3d-common/blob.c ++++ b/libs/vkd3d/libs/vkd3d-common/blob.c +@@ -17,11 +17,12 @@ + */ + + #define COBJMACROS ++ + #define CONST_VTABLE + #include "vkd3d.h" + #include "vkd3d_blob.h" +-#include "vkd3d_debug.h" + #include "vkd3d_memory.h" ++#include "d3d12shader.h" + + struct vkd3d_blob + { +diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c +index e12cd39450a..4523fc997ef 100644 +--- a/libs/vkd3d/libs/vkd3d-common/debug.c ++++ b/libs/vkd3d/libs/vkd3d-common/debug.c +@@ -20,7 +20,7 @@ + # define _WIN32_WINNT 0x0600 /* For InitOnceExecuteOnce(). */ + #endif + +-#include "vkd3d_debug.h" ++#include "vkd3d_common.h" + + #include + #include +diff --git a/libs/vkd3d/libs/vkd3d-common/error.c b/libs/vkd3d/libs/vkd3d-common/error.c +index 3572669ac1c..b8350a5404c 100644 +--- a/libs/vkd3d/libs/vkd3d-common/error.c ++++ b/libs/vkd3d/libs/vkd3d-common/error.c +@@ -17,7 +17,6 @@ + */ + + #include "vkd3d_common.h" +-#include "vkd3d_debug.h" + + HRESULT hresult_from_vkd3d_result(int vkd3d_result) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 3f86bd45960..cd8ba0a7d2b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -250,6 +250,7 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_NOT ] = "not", + [VKD3DSIH_NRM ] = "nrm", + [VKD3DSIH_OR ] = "or", ++ [VKD3DSIH_ORD ] = "ord", + [VKD3DSIH_PHASE ] = "phase", + [VKD3DSIH_PHI ] = "phi", + [VKD3DSIH_POW ] = "pow", +@@ -321,6 +322,7 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_UMAX ] = "umax", + [VKD3DSIH_UMIN ] = "umin", + [VKD3DSIH_UMUL ] = "umul", ++ [VKD3DSIH_UNO ] = "uno", + [VKD3DSIH_USHR ] = "ushr", + [VKD3DSIH_UTOD ] = "utod", + [VKD3DSIH_UTOF ] = "utof", +@@ -328,37 +330,6 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_XOR ] = "xor", + }; + +-static const struct +-{ +- enum vkd3d_shader_input_sysval_semantic sysval_semantic; +- const char *sysval_name; +-} +-shader_input_sysval_semantic_names[] = +-{ +- {VKD3D_SIV_POSITION, "position"}, +- {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, +- {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, +- {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, +- {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, +- {VKD3D_SIV_VERTEX_ID, "vertex_id"}, +- {VKD3D_SIV_INSTANCE_ID, "instance_id"}, +- {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, +- {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, +- {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, +- {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, +- {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, +- {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, +- {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, +- {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, +- {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, +- {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, +- {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, +- {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, +- {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, +- {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, +- {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, +-}; +- + struct vkd3d_d3d_asm_colours + { + const char *reset; +@@ -370,6 +341,7 @@ struct vkd3d_d3d_asm_colours + const char *swizzle; + const char *version; + const char *write_mask; ++ const char *label; + }; + + struct vkd3d_d3d_asm_compiler +@@ -377,22 +349,10 @@ struct vkd3d_d3d_asm_compiler + struct vkd3d_string_buffer buffer; + struct vkd3d_shader_version shader_version; + struct vkd3d_d3d_asm_colours colours; +- enum vsir_asm_dialect dialect; ++ enum vsir_asm_flags flags; + const struct vkd3d_shader_instruction *current; + }; + +-static int VKD3D_PRINTF_FUNC(2, 3) shader_addline(struct vkd3d_string_buffer *buffer, const char *format, ...) +-{ +- va_list args; +- int ret; +- +- va_start(args, format); +- ret = vkd3d_string_buffer_vprintf(buffer, format, args); +- va_end(args); +- +- return ret; +-} +- + /* Convert floating point offset relative to a register file to an absolute + * offset for float constants. */ + static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx) +@@ -445,6 +405,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, + vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); + } + ++static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) ++{ ++ if (atomic_flags & VKD3DARF_SEQ_CST) ++ { ++ vkd3d_string_buffer_printf(&compiler->buffer, "_seqCst"); ++ atomic_flags &= ~VKD3DARF_SEQ_CST; ++ } ++ if (atomic_flags & VKD3DARF_VOLATILE) ++ { ++ vkd3d_string_buffer_printf(&compiler->buffer, "_volatile"); ++ atomic_flags &= ~VKD3DARF_VOLATILE; ++ } ++ ++ if (atomic_flags) ++ vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", atomic_flags); ++} ++ + static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) + { + if (sync_flags & VKD3DSSF_GLOBAL_UAV) +@@ -511,96 +488,138 @@ static void shader_dump_uav_flags(struct vkd3d_d3d_asm_compiler *compiler, uint3 + vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", uav_flags); + } + +-static void shader_dump_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, +- enum vkd3d_tessellator_domain domain) ++static void shader_print_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, enum vkd3d_tessellator_domain d, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ const char *domain; + +- shader_addline(buffer, "domain_"); +- switch (domain) ++ switch (d) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: +- shader_addline(buffer, "isoline"); ++ domain = "domain_isoline"; + break; + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: +- shader_addline(buffer, "tri"); ++ domain = "domain_tri"; + break; + case VKD3D_TESSELLATOR_DOMAIN_QUAD: +- shader_addline(buffer, "quad"); ++ domain = "domain_quad"; + break; + default: +- shader_addline(buffer, "unknown_tessellator_domain(%#x)", domain); +- break; ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, d, compiler->colours.reset, suffix); ++ return; + } ++ ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, domain, suffix); + } + +-static void shader_dump_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, +- enum vkd3d_shader_tessellator_output_primitive output_primitive) ++static void shader_print_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, enum vkd3d_shader_tessellator_output_primitive p, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ const char *primitive; + +- shader_addline(buffer, "output_"); +- switch (output_primitive) ++ switch (p) + { + case VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT: +- shader_addline(buffer, "point"); ++ primitive = "output_point"; + break; + case VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE: +- shader_addline(buffer, "line"); ++ primitive = "output_line"; + break; + case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW: +- shader_addline(buffer, "triangle_cw"); ++ primitive = "output_triangle_cw"; + break; + case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW: +- shader_addline(buffer, "triangle_ccw"); ++ primitive = "output_triangle_ccw"; + break; + default: +- shader_addline(buffer, "unknown_tessellator_output_primitive(%#x)", output_primitive); +- break; ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, p, compiler->colours.reset, suffix); ++ return; + } ++ ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive, suffix); + } + +-static void shader_dump_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, +- enum vkd3d_shader_tessellator_partitioning partitioning) ++static void shader_print_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, enum vkd3d_shader_tessellator_partitioning p, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ const char *partitioning; + +- shader_addline(buffer, "partitioning_"); +- switch (partitioning) ++ switch (p) + { + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER: +- shader_addline(buffer, "integer"); ++ partitioning = "partitioning_integer"; + break; + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2: +- shader_addline(buffer, "pow2"); ++ partitioning = "partitioning_pow2"; + break; + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: +- shader_addline(buffer, "fractional_odd"); ++ partitioning = "partitioning_fractional_odd"; + break; + case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: +- shader_addline(buffer, "fractional_even"); ++ partitioning = "partitioning_fractional_even"; + break; + default: +- shader_addline(buffer, "unknown_tessellator_partitioning(%#x)", partitioning); +- break; ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, p, compiler->colours.reset, suffix); ++ return; + } ++ ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, partitioning, suffix); + } + +-static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, +- enum vkd3d_shader_input_sysval_semantic semantic) ++static void shader_print_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, enum vkd3d_shader_input_sysval_semantic semantic, const char *suffix) + { + unsigned int i; + ++ static const struct ++ { ++ enum vkd3d_shader_input_sysval_semantic sysval_semantic; ++ const char *sysval_name; ++ } ++ shader_input_sysval_semantic_names[] = ++ { ++ {VKD3D_SIV_POSITION, "position"}, ++ {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, ++ {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, ++ {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, ++ {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, ++ {VKD3D_SIV_VERTEX_ID, "vertex_id"}, ++ {VKD3D_SIV_INSTANCE_ID, "instance_id"}, ++ {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, ++ {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, ++ {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, ++ {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, ++ {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, ++ {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, ++ {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, ++ {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, ++ {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, ++ {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, ++ {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, ++ {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, ++ {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, ++ {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, ++ {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, ++ }; ++ + for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i) + { +- if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic) +- { +- vkd3d_string_buffer_printf(&compiler->buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name); +- return; +- } ++ if (shader_input_sysval_semantic_names[i].sysval_semantic != semantic) ++ continue; ++ ++ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", ++ prefix, shader_input_sysval_semantic_names[i].sysval_name, suffix); ++ return; + } + +- vkd3d_string_buffer_printf(&compiler->buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic); ++ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, semantic, compiler->colours.reset, suffix); + } + + static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_resource_type type) +@@ -646,6 +665,8 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum + [VKD3D_DATA_UINT8 ] = "uint8", + [VKD3D_DATA_UINT64 ] = "uint64", + [VKD3D_DATA_BOOL ] = "bool", ++ [VKD3D_DATA_UINT16 ] = "uint16", ++ [VKD3D_DATA_HALF ] = "half", + }; + + const char *name; +@@ -673,128 +694,133 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil + vkd3d_string_buffer_printf(&compiler->buffer, ")"); + } + +-static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_semantic *semantic, uint32_t flags) ++static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, const struct vkd3d_shader_semantic *semantic, uint32_t flags, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ unsigned int usage_idx; ++ const char *usage; ++ bool indexed; + + if (semantic->resource.reg.reg.type == VKD3DSPR_COMBINED_SAMPLER) + { + switch (semantic->resource_type) + { + case VKD3D_SHADER_RESOURCE_TEXTURE_2D: +- shader_addline(buffer, "_2d"); ++ usage = "2d"; + break; +- + case VKD3D_SHADER_RESOURCE_TEXTURE_3D: +- shader_addline(buffer, "_volume"); ++ usage = "volume"; + break; +- + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: +- shader_addline(buffer, "_cube"); ++ usage = "cube"; + break; +- + default: +- shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type); +- break; ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, semantic->resource_type, compiler->colours.reset, suffix); ++ return; + } ++ ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); ++ return; + } +- else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) ++ ++ if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) + { ++ vkd3d_string_buffer_printf(buffer, "%s", prefix); + if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) +- shader_addline(buffer, "_resource"); ++ vkd3d_string_buffer_printf(buffer, "resource_"); + +- shader_addline(buffer, "_"); + shader_dump_resource_type(compiler, semantic->resource_type); + if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS + || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + { +- shader_addline(buffer, "(%u)", semantic->sample_count); ++ vkd3d_string_buffer_printf(buffer, "(%u)", semantic->sample_count); + } + if (semantic->resource.reg.reg.type == VKD3DSPR_UAV) + shader_dump_uav_flags(compiler, flags); +- shader_addline(buffer, " "); ++ vkd3d_string_buffer_printf(buffer, " "); + shader_dump_resource_data_type(compiler, semantic->resource_data_type); ++ vkd3d_string_buffer_printf(buffer, "%s", suffix); ++ return; + } +- else +- { +- /* Pixel shaders 3.0 don't have usage semantics. */ +- if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) +- && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) +- return; +- else +- shader_addline(buffer, "_"); +- +- switch (semantic->usage) +- { +- case VKD3D_DECL_USAGE_POSITION: +- shader_addline(buffer, "position%u", semantic->usage_idx); +- break; +- +- case VKD3D_DECL_USAGE_BLEND_INDICES: +- shader_addline(buffer, "blend"); +- break; +- +- case VKD3D_DECL_USAGE_BLEND_WEIGHT: +- shader_addline(buffer, "weight"); +- break; +- +- case VKD3D_DECL_USAGE_NORMAL: +- shader_addline(buffer, "normal%u", semantic->usage_idx); +- break; +- +- case VKD3D_DECL_USAGE_PSIZE: +- shader_addline(buffer, "psize"); +- break; +- +- case VKD3D_DECL_USAGE_COLOR: +- if (!semantic->usage_idx) +- shader_addline(buffer, "color"); +- else +- shader_addline(buffer, "specular%u", (semantic->usage_idx - 1)); +- break; +- +- case VKD3D_DECL_USAGE_TEXCOORD: +- shader_addline(buffer, "texcoord%u", semantic->usage_idx); +- break; +- +- case VKD3D_DECL_USAGE_TANGENT: +- shader_addline(buffer, "tangent"); +- break; +- +- case VKD3D_DECL_USAGE_BINORMAL: +- shader_addline(buffer, "binormal"); +- break; +- +- case VKD3D_DECL_USAGE_TESS_FACTOR: +- shader_addline(buffer, "tessfactor"); +- break; +- +- case VKD3D_DECL_USAGE_POSITIONT: +- shader_addline(buffer, "positionT%u", semantic->usage_idx); +- break; +- +- case VKD3D_DECL_USAGE_FOG: +- shader_addline(buffer, "fog"); +- break; + +- case VKD3D_DECL_USAGE_DEPTH: +- shader_addline(buffer, "depth"); +- break; ++ /* Pixel shaders 3.0 don't have usage semantics. */ ++ if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) ++ && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) ++ return; + +- case VKD3D_DECL_USAGE_SAMPLE: +- shader_addline(buffer, "sample"); ++ indexed = false; ++ usage_idx = semantic->usage_idx; ++ switch (semantic->usage) ++ { ++ case VKD3D_DECL_USAGE_POSITION: ++ usage = "position"; ++ indexed = true; ++ break; ++ case VKD3D_DECL_USAGE_BLEND_INDICES: ++ usage = "blend"; ++ break; ++ case VKD3D_DECL_USAGE_BLEND_WEIGHT: ++ usage = "weight"; ++ break; ++ case VKD3D_DECL_USAGE_NORMAL: ++ usage = "normal"; ++ indexed = true; ++ break; ++ case VKD3D_DECL_USAGE_PSIZE: ++ usage = "psize"; ++ break; ++ case VKD3D_DECL_USAGE_COLOR: ++ if (semantic->usage_idx) ++ { ++ usage = "specular"; ++ indexed = true; ++ --usage_idx; + break; +- +- default: +- shader_addline(buffer, "", semantic->usage); +- FIXME("Unrecognised semantic usage %#x.\n", semantic->usage); +- } ++ } ++ usage = "color"; ++ break; ++ case VKD3D_DECL_USAGE_TEXCOORD: ++ usage = "texcoord"; ++ indexed = true; ++ break; ++ case VKD3D_DECL_USAGE_TANGENT: ++ usage = "tangent"; ++ break; ++ case VKD3D_DECL_USAGE_BINORMAL: ++ usage = "binormal"; ++ break; ++ case VKD3D_DECL_USAGE_TESS_FACTOR: ++ usage = "tessfactor"; ++ break; ++ case VKD3D_DECL_USAGE_POSITIONT: ++ usage = "positionT"; ++ indexed = true; ++ break; ++ case VKD3D_DECL_USAGE_FOG: ++ usage = "fog"; ++ break; ++ case VKD3D_DECL_USAGE_DEPTH: ++ usage = "depth"; ++ break; ++ case VKD3D_DECL_USAGE_SAMPLE: ++ usage = "sample"; ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, semantic->usage, usage_idx, compiler->colours.reset, suffix); ++ return; + } ++ ++ if (indexed) ++ vkd3d_string_buffer_printf(buffer, "%s%s%u%s", prefix, usage, usage_idx, suffix); ++ else ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); + } + +-static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_src_param *param); ++static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix); + + static void shader_print_float_literal(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, float f, const char *suffix) +@@ -891,13 +917,9 @@ static void shader_print_untyped_literal(struct vkd3d_d3d_asm_compiler *compiler + static void shader_print_subscript(struct vkd3d_d3d_asm_compiler *compiler, + unsigned int offset, const struct vkd3d_shader_src_param *rel_addr) + { +- vkd3d_string_buffer_printf(&compiler->buffer, "["); + if (rel_addr) +- { +- shader_dump_src_param(compiler, rel_addr); +- vkd3d_string_buffer_printf(&compiler->buffer, " + "); +- } +- shader_print_uint_literal(compiler, "", offset, "]"); ++ shader_print_src_param(compiler, "[", rel_addr, " + "); ++ shader_print_uint_literal(compiler, rel_addr ? "" : "[", offset, "]"); + } + + static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler, +@@ -910,8 +932,8 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler + vkd3d_string_buffer_printf(&compiler->buffer, "*]"); + } + +-static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg, +- bool is_declaration) ++static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, ++ const struct vkd3d_shader_register *reg, bool is_declaration, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; + unsigned int offset = reg->idx[0].offset; +@@ -920,22 +942,23 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; + static const char * const misctype_reg_names[] = {"vPos", "vFace"}; + +- shader_addline(buffer, "%s", compiler->colours.reg); ++ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, ++ reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); + switch (reg->type) + { + case VKD3DSPR_TEMP: +- shader_addline(buffer, "r"); ++ vkd3d_string_buffer_printf(buffer, "r"); + break; + + case VKD3DSPR_INPUT: +- shader_addline(buffer, "v"); ++ vkd3d_string_buffer_printf(buffer, "v"); + break; + + case VKD3DSPR_CONST: + case VKD3DSPR_CONST2: + case VKD3DSPR_CONST3: + case VKD3DSPR_CONST4: +- shader_addline(buffer, "c"); ++ vkd3d_string_buffer_printf(buffer, "c"); + offset = shader_get_float_offset(reg->type, offset); + break; + +@@ -945,205 +968,202 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + break; + + case VKD3DSPR_RASTOUT: +- shader_addline(buffer, "%s", rastout_reg_names[offset]); ++ vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]); + break; + + case VKD3DSPR_COLOROUT: +- shader_addline(buffer, "oC"); ++ vkd3d_string_buffer_printf(buffer, "oC"); + break; + + case VKD3DSPR_DEPTHOUT: +- shader_addline(buffer, "oDepth"); ++ vkd3d_string_buffer_printf(buffer, "oDepth"); + break; + + case VKD3DSPR_DEPTHOUTGE: +- shader_addline(buffer, "oDepthGE"); ++ vkd3d_string_buffer_printf(buffer, "oDepthGE"); + break; + + case VKD3DSPR_DEPTHOUTLE: +- shader_addline(buffer, "oDepthLE"); ++ vkd3d_string_buffer_printf(buffer, "oDepthLE"); + break; + + case VKD3DSPR_ATTROUT: +- shader_addline(buffer, "oD"); ++ vkd3d_string_buffer_printf(buffer, "oD"); + break; + + case VKD3DSPR_TEXCRDOUT: + /* Vertex shaders >= 3.0 use general purpose output registers + * (VKD3DSPR_OUTPUT), which can include an address token. */ + if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)) +- shader_addline(buffer, "o"); ++ vkd3d_string_buffer_printf(buffer, "o"); + else +- shader_addline(buffer, "oT"); ++ vkd3d_string_buffer_printf(buffer, "oT"); + break; + + case VKD3DSPR_CONSTINT: +- shader_addline(buffer, "i"); ++ vkd3d_string_buffer_printf(buffer, "i"); + break; + + case VKD3DSPR_CONSTBOOL: +- shader_addline(buffer, "b"); ++ vkd3d_string_buffer_printf(buffer, "b"); + break; + + case VKD3DSPR_LABEL: +- shader_addline(buffer, "l"); ++ vkd3d_string_buffer_printf(buffer, "l"); + break; + + case VKD3DSPR_LOOP: +- shader_addline(buffer, "aL"); ++ vkd3d_string_buffer_printf(buffer, "aL"); + break; + + case VKD3DSPR_COMBINED_SAMPLER: + case VKD3DSPR_SAMPLER: +- shader_addline(buffer, "s"); ++ vkd3d_string_buffer_printf(buffer, "s"); + is_descriptor = true; + break; + + case VKD3DSPR_MISCTYPE: + if (offset > 1) +- { +- FIXME("Unhandled misctype register %u.\n", offset); +- shader_addline(buffer, "", offset); +- } ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, offset, compiler->colours.reset); + else +- { +- shader_addline(buffer, "%s", misctype_reg_names[offset]); +- } ++ vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]); + break; + + case VKD3DSPR_PREDICATE: +- shader_addline(buffer, "p"); ++ vkd3d_string_buffer_printf(buffer, "p"); + break; + + case VKD3DSPR_IMMCONST: +- shader_addline(buffer, "l"); ++ vkd3d_string_buffer_printf(buffer, "l"); + break; + + case VKD3DSPR_IMMCONST64: +- shader_addline(buffer, "d"); ++ vkd3d_string_buffer_printf(buffer, "d"); + break; + + case VKD3DSPR_CONSTBUFFER: +- shader_addline(buffer, "cb"); ++ vkd3d_string_buffer_printf(buffer, "cb"); + is_descriptor = true; + break; + + case VKD3DSPR_IMMCONSTBUFFER: +- shader_addline(buffer, "icb"); ++ vkd3d_string_buffer_printf(buffer, "icb"); + break; + + case VKD3DSPR_PRIMID: +- shader_addline(buffer, "primID"); ++ vkd3d_string_buffer_printf(buffer, "primID"); + break; + + case VKD3DSPR_NULL: +- shader_addline(buffer, "null"); ++ vkd3d_string_buffer_printf(buffer, "null"); + break; + + case VKD3DSPR_RASTERIZER: +- shader_addline(buffer, "rasterizer"); ++ vkd3d_string_buffer_printf(buffer, "rasterizer"); + break; + + case VKD3DSPR_RESOURCE: +- shader_addline(buffer, "t"); ++ vkd3d_string_buffer_printf(buffer, "t"); + is_descriptor = true; + break; + + case VKD3DSPR_UAV: +- shader_addline(buffer, "u"); ++ vkd3d_string_buffer_printf(buffer, "u"); + is_descriptor = true; + break; + + case VKD3DSPR_OUTPOINTID: +- shader_addline(buffer, "vOutputControlPointID"); ++ vkd3d_string_buffer_printf(buffer, "vOutputControlPointID"); + break; + + case VKD3DSPR_FORKINSTID: +- shader_addline(buffer, "vForkInstanceId"); ++ vkd3d_string_buffer_printf(buffer, "vForkInstanceId"); + break; + + case VKD3DSPR_JOININSTID: +- shader_addline(buffer, "vJoinInstanceId"); ++ vkd3d_string_buffer_printf(buffer, "vJoinInstanceId"); + break; + + case VKD3DSPR_INCONTROLPOINT: +- shader_addline(buffer, "vicp"); ++ vkd3d_string_buffer_printf(buffer, "vicp"); + break; + + case VKD3DSPR_OUTCONTROLPOINT: +- shader_addline(buffer, "vocp"); ++ vkd3d_string_buffer_printf(buffer, "vocp"); + break; + + case VKD3DSPR_PATCHCONST: +- shader_addline(buffer, "vpc"); ++ vkd3d_string_buffer_printf(buffer, "vpc"); + break; + + case VKD3DSPR_TESSCOORD: +- shader_addline(buffer, "vDomainLocation"); ++ vkd3d_string_buffer_printf(buffer, "vDomainLocation"); + break; + + case VKD3DSPR_GROUPSHAREDMEM: +- shader_addline(buffer, "g"); ++ vkd3d_string_buffer_printf(buffer, "g"); + break; + + case VKD3DSPR_THREADID: +- shader_addline(buffer, "vThreadID"); ++ vkd3d_string_buffer_printf(buffer, "vThreadID"); + break; + + case VKD3DSPR_THREADGROUPID: +- shader_addline(buffer, "vThreadGroupID"); ++ vkd3d_string_buffer_printf(buffer, "vThreadGroupID"); + break; + + case VKD3DSPR_LOCALTHREADID: +- shader_addline(buffer, "vThreadIDInGroup"); ++ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup"); + break; + + case VKD3DSPR_LOCALTHREADINDEX: +- shader_addline(buffer, "vThreadIDInGroupFlattened"); ++ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened"); + break; + + case VKD3DSPR_IDXTEMP: +- shader_addline(buffer, "x"); ++ vkd3d_string_buffer_printf(buffer, "x"); + break; + + case VKD3DSPR_STREAM: +- shader_addline(buffer, "m"); ++ vkd3d_string_buffer_printf(buffer, "m"); + break; + + case VKD3DSPR_FUNCTIONBODY: +- shader_addline(buffer, "fb"); ++ vkd3d_string_buffer_printf(buffer, "fb"); + break; + + case VKD3DSPR_FUNCTIONPOINTER: +- shader_addline(buffer, "fp"); ++ vkd3d_string_buffer_printf(buffer, "fp"); + break; + + case VKD3DSPR_COVERAGE: +- shader_addline(buffer, "vCoverage"); ++ vkd3d_string_buffer_printf(buffer, "vCoverage"); + break; + + case VKD3DSPR_SAMPLEMASK: +- shader_addline(buffer, "oMask"); ++ vkd3d_string_buffer_printf(buffer, "oMask"); + break; + + case VKD3DSPR_GSINSTID: +- shader_addline(buffer, "vGSInstanceID"); ++ vkd3d_string_buffer_printf(buffer, "vGSInstanceID"); + break; + + case VKD3DSPR_OUTSTENCILREF: +- shader_addline(buffer, "oStencilRef"); ++ vkd3d_string_buffer_printf(buffer, "oStencilRef"); + break; + + case VKD3DSPR_UNDEF: +- shader_addline(buffer, "undef"); ++ vkd3d_string_buffer_printf(buffer, "undef"); + break; + + case VKD3DSPR_SSA: +- shader_addline(buffer, "sr"); ++ vkd3d_string_buffer_printf(buffer, "sr"); + break; + + default: +- shader_addline(buffer, "", reg->type); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->type, compiler->colours.reset); + break; + } + +@@ -1162,7 +1182,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + break; + } + +- shader_addline(buffer, "%s(", compiler->colours.reset); ++ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); + switch (reg->dimension) + { + case VSIR_DIMENSION_SCALAR: +@@ -1183,7 +1203,8 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); + break; + default: +- shader_addline(buffer, "", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->data_type, compiler->colours.reset); + break; + } + break; +@@ -1222,20 +1243,22 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[3], ""); + break; + default: +- shader_addline(buffer, "", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->data_type, compiler->colours.reset); + break; + } + break; + + default: +- shader_addline(buffer, "", reg->dimension); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->dimension, compiler->colours.reset); + break; + } +- shader_addline(buffer, ")"); ++ vkd3d_string_buffer_printf(buffer, ")"); + } + else if (reg->type == VKD3DSPR_IMMCONST64) + { +- shader_addline(buffer, "%s(", compiler->colours.reset); ++ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); + /* A double2 vector is treated as a float4 vector in enum vsir_dimension. */ + if (reg->dimension == VSIR_DIMENSION_SCALAR || reg->dimension == VSIR_DIMENSION_VEC4) + { +@@ -1253,14 +1276,16 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + } + else + { +- shader_addline(buffer, "", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->data_type, compiler->colours.reset); + } + } + else + { +- shader_addline(buffer, "", reg->dimension); ++ vkd3d_string_buffer_printf(buffer, "%s%s", ++ compiler->colours.error, reg->dimension, compiler->colours.reset); + } +- shader_addline(buffer, ")"); ++ vkd3d_string_buffer_printf(buffer, ")"); + } + else if (reg->type != VKD3DSPR_RASTOUT + && reg->type != VKD3DSPR_MISCTYPE +@@ -1304,7 +1329,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + } + else + { +- shader_addline(buffer, "%s", compiler->colours.reset); ++ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); + } + + if (reg->type == VKD3DSPR_FUNCTIONPOINTER) +@@ -1312,8 +1337,9 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const + } + else + { +- shader_addline(buffer, "%s", compiler->colours.reset); ++ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); + } ++ vkd3d_string_buffer_printf(buffer, "%s", suffix); + } + + static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) +@@ -1357,8 +1383,8 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co + compiler->colours.modifier, compiler->colours.reset); + } + +-static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_register *reg) ++static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, const struct vkd3d_shader_register *reg, const char *suffix) + { + static const char *dimensions[] = + { +@@ -1370,7 +1396,13 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, + struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *dimension; + +- if (compiler->dialect != VSIR_ASM_VSIR) ++ if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES)) ++ { ++ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, suffix); ++ return; ++ } ++ ++ if (reg->data_type == VKD3D_DATA_UNUSED) + return; + + if (reg->dimension < ARRAY_SIZE(dimensions)) +@@ -1378,83 +1410,114 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, + else + dimension = "??"; + +- shader_addline(buffer, " <%s", dimension); ++ vkd3d_string_buffer_printf(buffer, "%s <%s", prefix, dimension); + shader_dump_data_type(compiler, reg->data_type); +- shader_addline(buffer, ">"); ++ vkd3d_string_buffer_printf(buffer, ">%s", suffix); + } + +-static void shader_dump_dst_param(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_dst_param *param, bool is_declaration) ++static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, uint32_t mask, const char *suffix) ++{ ++ unsigned int i = 0; ++ char buffer[5]; ++ ++ if (mask == 0) ++ { ++ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s", prefix, suffix); ++ return; ++ } ++ ++ if (mask & VKD3DSP_WRITEMASK_0) ++ buffer[i++] = 'x'; ++ if (mask & VKD3DSP_WRITEMASK_1) ++ buffer[i++] = 'y'; ++ if (mask & VKD3DSP_WRITEMASK_2) ++ buffer[i++] = 'z'; ++ if (mask & VKD3DSP_WRITEMASK_3) ++ buffer[i++] = 'w'; ++ buffer[i++] = '\0'; ++ ++ vkd3d_string_buffer_printf(&compiler->buffer, "%s.%s%s%s%s", prefix, ++ compiler->colours.write_mask, buffer, compiler->colours.reset, suffix); ++} ++ ++static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, const struct vkd3d_shader_dst_param *param, bool is_declaration, const char *suffix) + { +- struct vkd3d_string_buffer *buffer = &compiler->buffer; + uint32_t write_mask = param->write_mask; + +- shader_dump_register(compiler, ¶m->reg, is_declaration); ++ shader_print_register(compiler, prefix, ¶m->reg, is_declaration, ""); + + if (write_mask && param->reg.dimension == VSIR_DIMENSION_VEC4) + { +- static const char write_mask_chars[] = "xyzw"; +- + if (data_type_is_64_bit(param->reg.data_type)) + write_mask = vsir_write_mask_32_from_64(write_mask); + +- shader_addline(buffer, ".%s", compiler->colours.write_mask); +- if (write_mask & VKD3DSP_WRITEMASK_0) +- shader_addline(buffer, "%c", write_mask_chars[0]); +- if (write_mask & VKD3DSP_WRITEMASK_1) +- shader_addline(buffer, "%c", write_mask_chars[1]); +- if (write_mask & VKD3DSP_WRITEMASK_2) +- shader_addline(buffer, "%c", write_mask_chars[2]); +- if (write_mask & VKD3DSP_WRITEMASK_3) +- shader_addline(buffer, "%c", write_mask_chars[3]); +- shader_addline(buffer, "%s", compiler->colours.reset); ++ shader_print_write_mask(compiler, "", write_mask, ""); + } + + shader_print_precision(compiler, ¶m->reg); + shader_print_non_uniform(compiler, ¶m->reg); +- shader_dump_reg_type(compiler, ¶m->reg); ++ shader_print_reg_type(compiler, "", ¶m->reg, suffix); + } + +-static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_src_param *param) ++static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix) + { + enum vkd3d_shader_src_modifier src_modifier = param->modifiers; + struct vkd3d_string_buffer *buffer = &compiler->buffer; + uint32_t swizzle = param->swizzle; ++ const char *modifier = ""; ++ bool is_abs = false; + + if (src_modifier == VKD3DSPSM_NEG + || src_modifier == VKD3DSPSM_BIASNEG + || src_modifier == VKD3DSPSM_SIGNNEG + || src_modifier == VKD3DSPSM_X2NEG + || src_modifier == VKD3DSPSM_ABSNEG) +- shader_addline(buffer, "-"); ++ modifier = "-"; + else if (src_modifier == VKD3DSPSM_COMP) +- shader_addline(buffer, "1-"); ++ modifier = "1-"; + else if (src_modifier == VKD3DSPSM_NOT) +- shader_addline(buffer, "!"); ++ modifier = "!"; ++ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, modifier); + + if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) +- shader_addline(buffer, "|"); ++ is_abs = true; + +- shader_dump_register(compiler, ¶m->reg, false); ++ shader_print_register(compiler, is_abs ? "|" : "", ¶m->reg, false, ""); + + switch (src_modifier) + { +- case VKD3DSPSM_NONE: break; +- case VKD3DSPSM_NEG: break; +- case VKD3DSPSM_NOT: break; +- case VKD3DSPSM_BIAS: shader_addline(buffer, "_bias"); break; +- case VKD3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break; +- case VKD3DSPSM_SIGN: shader_addline(buffer, "_bx2"); break; +- case VKD3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break; +- case VKD3DSPSM_COMP: break; +- case VKD3DSPSM_X2: shader_addline(buffer, "_x2"); break; +- case VKD3DSPSM_X2NEG: shader_addline(buffer, "_x2"); break; +- case VKD3DSPSM_DZ: shader_addline(buffer, "_dz"); break; +- case VKD3DSPSM_DW: shader_addline(buffer, "_dw"); break; ++ case VKD3DSPSM_NONE: ++ case VKD3DSPSM_NEG: ++ case VKD3DSPSM_COMP: ++ case VKD3DSPSM_ABS: + case VKD3DSPSM_ABSNEG: +- case VKD3DSPSM_ABS: /* handled later */ break; +- default: shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier); ++ case VKD3DSPSM_NOT: ++ break; ++ case VKD3DSPSM_BIAS: ++ case VKD3DSPSM_BIASNEG: ++ vkd3d_string_buffer_printf(buffer, "_bias"); ++ break; ++ case VKD3DSPSM_SIGN: ++ case VKD3DSPSM_SIGNNEG: ++ vkd3d_string_buffer_printf(buffer, "_bx2"); ++ break; ++ case VKD3DSPSM_X2: ++ case VKD3DSPSM_X2NEG: ++ vkd3d_string_buffer_printf(buffer, "_x2"); ++ break; ++ case VKD3DSPSM_DZ: ++ vkd3d_string_buffer_printf(buffer, "_dz"); ++ break; ++ case VKD3DSPSM_DW: ++ vkd3d_string_buffer_printf(buffer, "_dw"); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "_%s%s", ++ compiler->colours.error, src_modifier, compiler->colours.reset); ++ break; + } + + if (param->reg.type != VKD3DSPR_IMMCONST && param->reg.type != VKD3DSPR_IMMCONST64 +@@ -1472,26 +1535,21 @@ static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, + swizzle_z = vsir_swizzle_get_component(swizzle, 2); + swizzle_w = vsir_swizzle_get_component(swizzle, 3); + +- if (swizzle_x == swizzle_y +- && swizzle_x == swizzle_z +- && swizzle_x == swizzle_w) +- { +- shader_addline(buffer, ".%s%c%s", compiler->colours.swizzle, ++ if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) ++ vkd3d_string_buffer_printf(buffer, ".%s%c%s", compiler->colours.swizzle, + swizzle_chars[swizzle_x], compiler->colours.reset); +- } + else +- { +- shader_addline(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, ++ vkd3d_string_buffer_printf(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, + swizzle_chars[swizzle_x], swizzle_chars[swizzle_y], + swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset); +- } + } +- if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) +- shader_addline(buffer, "|"); ++ ++ if (is_abs) ++ vkd3d_string_buffer_printf(buffer, "|"); + + shader_print_precision(compiler, ¶m->reg); + shader_print_non_uniform(compiler, ¶m->reg); +- shader_dump_reg_type(compiler, ¶m->reg); ++ shader_print_reg_type(compiler, "", ¶m->reg, suffix); + } + + static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1502,105 +1560,129 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, + + switch (dst->shift) + { +- case 0: break; +- case 13: shader_addline(buffer, "_d8"); break; +- case 14: shader_addline(buffer, "_d4"); break; +- case 15: shader_addline(buffer, "_d2"); break; +- case 1: shader_addline(buffer, "_x2"); break; +- case 2: shader_addline(buffer, "_x4"); break; +- case 3: shader_addline(buffer, "_x8"); break; +- default: shader_addline(buffer, "_unhandled_shift(%d)", dst->shift); break; ++ case 0: ++ break; ++ case 13: ++ vkd3d_string_buffer_printf(buffer, "_d8"); ++ break; ++ case 14: ++ vkd3d_string_buffer_printf(buffer, "_d4"); ++ break; ++ case 15: ++ vkd3d_string_buffer_printf(buffer, "_d2"); ++ break; ++ case 1: ++ vkd3d_string_buffer_printf(buffer, "_x2"); ++ break; ++ case 2: ++ vkd3d_string_buffer_printf(buffer, "_x4"); ++ break; ++ case 3: ++ vkd3d_string_buffer_printf(buffer, "_x8"); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "_unhandled_shift(%d)", dst->shift); ++ break; + } + +- if (mmask & VKD3DSPDM_SATURATE) shader_addline(buffer, "_sat"); +- if (mmask & VKD3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp"); +- if (mmask & VKD3DSPDM_MSAMPCENTROID) shader_addline(buffer, "_centroid"); ++ if (mmask & VKD3DSPDM_SATURATE) ++ vkd3d_string_buffer_printf(buffer, "_sat"); ++ if (mmask & VKD3DSPDM_PARTIALPRECISION) ++ vkd3d_string_buffer_printf(buffer, "_pp"); ++ if (mmask & VKD3DSPDM_MSAMPCENTROID) ++ vkd3d_string_buffer_printf(buffer, "_centroid"); + + mmask &= ~VKD3DSPDM_MASK; + if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask); + } + +-static void shader_dump_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_primitive_type *primitive_type) ++static void shader_print_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, const struct vkd3d_shader_primitive_type *p, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ const char *primitive_type; + +- switch (primitive_type->type) ++ switch (p->type) + { + case VKD3D_PT_UNDEFINED: +- shader_addline(buffer, "undefined"); ++ primitive_type = "undefined"; + break; + case VKD3D_PT_POINTLIST: +- shader_addline(buffer, "pointlist"); ++ primitive_type = "pointlist"; + break; + case VKD3D_PT_LINELIST: +- shader_addline(buffer, "linelist"); ++ primitive_type = "linelist"; + break; + case VKD3D_PT_LINESTRIP: +- shader_addline(buffer, "linestrip"); ++ primitive_type = "linestrip"; + break; + case VKD3D_PT_TRIANGLELIST: +- shader_addline(buffer, "trianglelist"); ++ primitive_type = "trianglelist"; + break; + case VKD3D_PT_TRIANGLESTRIP: +- shader_addline(buffer, "trianglestrip"); ++ primitive_type = "trianglestrip"; + break; + case VKD3D_PT_TRIANGLEFAN: +- shader_addline(buffer, "trianglefan"); ++ primitive_type = "trianglefan"; + break; + case VKD3D_PT_LINELIST_ADJ: +- shader_addline(buffer, "linelist_adj"); ++ primitive_type = "linelist_adj"; + break; + case VKD3D_PT_LINESTRIP_ADJ: +- shader_addline(buffer, "linestrip_adj"); ++ primitive_type = "linestrip_adj"; + break; + case VKD3D_PT_TRIANGLELIST_ADJ: +- shader_addline(buffer, "trianglelist_adj"); ++ primitive_type = "trianglelist_adj"; + break; + case VKD3D_PT_TRIANGLESTRIP_ADJ: +- shader_addline(buffer, "trianglestrip_adj"); ++ primitive_type = "trianglestrip_adj"; + break; + case VKD3D_PT_PATCH: +- shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count); +- break; ++ vkd3d_string_buffer_printf(buffer, "%spatch%u%s", prefix, p->patch_vertex_count, suffix); ++ return; + default: +- shader_addline(buffer, "", primitive_type->type); +- break; ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, p->type, compiler->colours.reset, suffix); ++ return; + } ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive_type, suffix); + } + +-static void shader_dump_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, +- enum vkd3d_shader_interpolation_mode interpolation_mode) ++static void shader_print_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *prefix, enum vkd3d_shader_interpolation_mode m, const char *suffix) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ const char *mode; + +- switch (interpolation_mode) ++ switch (m) + { + case VKD3DSIM_CONSTANT: +- shader_addline(buffer, "constant"); ++ mode = "constant"; + break; + case VKD3DSIM_LINEAR: +- shader_addline(buffer, "linear"); ++ mode = "linear"; + break; + case VKD3DSIM_LINEAR_CENTROID: +- shader_addline(buffer, "linear centroid"); ++ mode = "linear centroid"; + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE: +- shader_addline(buffer, "linear noperspective"); ++ mode = "linear noperspective"; + break; + case VKD3DSIM_LINEAR_SAMPLE: +- shader_addline(buffer, "linear sample"); ++ mode = "linear sample"; + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID: +- shader_addline(buffer, "linear noperspective centroid"); ++ mode = "linear noperspective centroid"; + break; + case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: +- shader_addline(buffer, "linear noperspective sample"); ++ mode = "linear noperspective sample"; + break; + default: +- shader_addline(buffer, "", interpolation_mode); +- break; ++ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", ++ prefix, compiler->colours.error, m, compiler->colours.reset, suffix); ++ return; + } ++ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, mode, suffix); + } + + const char *shader_get_type_prefix(enum vkd3d_shader_type type) +@@ -1654,9 +1736,15 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile + case VKD3DSIH_RETP: + switch (ins->flags) + { +- case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; +- case VKD3D_SHADER_CONDITIONAL_OP_Z: shader_addline(buffer, "_z"); break; +- default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); break; ++ case VKD3D_SHADER_CONDITIONAL_OP_NZ: ++ vkd3d_string_buffer_printf(buffer, "_nz"); ++ break; ++ case VKD3D_SHADER_CONDITIONAL_OP_Z: ++ vkd3d_string_buffer_printf(buffer, "_z"); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); ++ break; + } + break; + +@@ -1664,49 +1752,88 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile + case VKD3DSIH_BREAKC: + switch (ins->flags) + { +- case VKD3D_SHADER_REL_OP_GT: shader_addline(buffer, "_gt"); break; +- case VKD3D_SHADER_REL_OP_EQ: shader_addline(buffer, "_eq"); break; +- case VKD3D_SHADER_REL_OP_GE: shader_addline(buffer, "_ge"); break; +- case VKD3D_SHADER_REL_OP_LT: shader_addline(buffer, "_lt"); break; +- case VKD3D_SHADER_REL_OP_NE: shader_addline(buffer, "_ne"); break; +- case VKD3D_SHADER_REL_OP_LE: shader_addline(buffer, "_le"); break; +- default: shader_addline(buffer, "_(%u)", ins->flags); ++ case VKD3D_SHADER_REL_OP_GT: ++ vkd3d_string_buffer_printf(buffer, "_gt"); ++ break; ++ case VKD3D_SHADER_REL_OP_EQ: ++ vkd3d_string_buffer_printf(buffer, "_eq"); ++ break; ++ case VKD3D_SHADER_REL_OP_GE: ++ vkd3d_string_buffer_printf(buffer, "_ge"); ++ break; ++ case VKD3D_SHADER_REL_OP_LT: ++ vkd3d_string_buffer_printf(buffer, "_lt"); ++ break; ++ case VKD3D_SHADER_REL_OP_NE: ++ vkd3d_string_buffer_printf(buffer, "_ne"); ++ break; ++ case VKD3D_SHADER_REL_OP_LE: ++ vkd3d_string_buffer_printf(buffer, "_le"); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "_(%u)", ins->flags); ++ break; + } + break; + + case VKD3DSIH_RESINFO: + switch (ins->flags) + { +- case VKD3DSI_NONE: break; +- case VKD3DSI_RESINFO_RCP_FLOAT: shader_addline(buffer, "_rcpFloat"); break; +- case VKD3DSI_RESINFO_UINT: shader_addline(buffer, "_uint"); break; +- default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); ++ case VKD3DSI_NONE: ++ break; ++ case VKD3DSI_RESINFO_RCP_FLOAT: ++ vkd3d_string_buffer_printf(buffer, "_rcpFloat"); ++ break; ++ case VKD3DSI_RESINFO_UINT: ++ vkd3d_string_buffer_printf(buffer, "_uint"); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); ++ break; + } + break; + + case VKD3DSIH_SAMPLE_INFO: + switch (ins->flags) + { +- case VKD3DSI_NONE: break; +- case VKD3DSI_SAMPLE_INFO_UINT: shader_addline(buffer, "_uint"); break; +- default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); ++ case VKD3DSI_NONE: ++ break; ++ case VKD3DSI_SAMPLE_INFO_UINT: ++ vkd3d_string_buffer_printf(buffer, "_uint"); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); ++ break; + } + break; + ++ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: ++ case VKD3DSIH_IMM_ATOMIC_IADD: ++ case VKD3DSIH_IMM_ATOMIC_AND: ++ case VKD3DSIH_IMM_ATOMIC_IMAX: ++ case VKD3DSIH_IMM_ATOMIC_IMIN: ++ case VKD3DSIH_IMM_ATOMIC_OR: ++ case VKD3DSIH_IMM_ATOMIC_UMAX: ++ case VKD3DSIH_IMM_ATOMIC_UMIN: ++ case VKD3DSIH_IMM_ATOMIC_EXCH: ++ case VKD3DSIH_IMM_ATOMIC_XOR: ++ shader_dump_atomic_op_flags(compiler, ins->flags); ++ break; ++ + case VKD3DSIH_SYNC: + shader_dump_sync_flags(compiler, ins->flags); + break; + + case VKD3DSIH_TEX: + if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) +- shader_addline(buffer, "p"); ++ vkd3d_string_buffer_printf(buffer, "p"); + break; + + case VKD3DSIH_ISHL: + case VKD3DSIH_ISHR: + case VKD3DSIH_USHR: + if (ins->flags & VKD3DSI_SHIFT_UNMASKED) +- shader_addline(buffer, "_unmasked"); ++ vkd3d_string_buffer_printf(buffer, "_unmasked"); + /* fall through */ + default: + shader_dump_precise_flags(compiler, ins->flags); +@@ -1753,7 +1880,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, + shader_print_hex_literal(compiler, ", ", icb->data[4 * i + 3], "},\n"); + } + } +- shader_addline(buffer, "}"); ++ vkd3d_string_buffer_printf(buffer, "}"); + } + + static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1765,11 +1892,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + compiler->current = ins; + + if (ins->predicate) +- { +- vkd3d_string_buffer_printf(buffer, "("); +- shader_dump_src_param(compiler, ins->predicate); +- vkd3d_string_buffer_printf(buffer, ") "); +- } ++ shader_print_src_param(compiler, "(", ins->predicate, ") "); + + /* PixWin marks instructions with the coissue flag with a '+' */ + if (ins->coissue) +@@ -1782,21 +1905,20 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_UAV_TYPED: + vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.opcode); +- shader_dump_decl_usage(compiler, &ins->declaration.semantic, ins->flags); ++ shader_print_dcl_usage(compiler, "_", &ins->declaration.semantic, ins->flags, ""); + shader_dump_ins_modifiers(compiler, &ins->declaration.semantic.resource.reg); +- vkd3d_string_buffer_printf(buffer, "%s ", compiler->colours.reset); +- shader_dump_register(compiler, &ins->declaration.semantic.resource.reg.reg, true); ++ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); ++ shader_print_register(compiler, " ", &ins->declaration.semantic.resource.reg.reg, true, ""); + shader_dump_register_space(compiler, ins->declaration.semantic.resource.range.space); + break; + + case VKD3DSIH_DCL_CONSTANT_BUFFER: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_register(compiler, &ins->declaration.cb.src.reg, true); ++ shader_print_register(compiler, " ", &ins->declaration.cb.src.reg, true, ""); + if (vkd3d_shader_ver_ge(&compiler->shader_version, 6, 0)) + shader_print_subscript(compiler, ins->declaration.cb.size, NULL); + else if (vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1)) + shader_print_subscript(compiler, ins->declaration.cb.size / VKD3D_VEC4_SIZE / sizeof(float), NULL); +- shader_addline(buffer, ", %s", ++ vkd3d_string_buffer_printf(buffer, ", %s", + ins->flags & VKD3DSI_INDEXED_DYNAMIC ? "dynamicIndexed" : "immediateIndexed"); + shader_dump_register_space(compiler, ins->declaration.cb.range.space); + break; +@@ -1823,8 +1945,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + break; + + case VKD3DSIH_DCL_INDEX_RANGE: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.index_range.dst, true); ++ shader_print_dst_param(compiler, " ", &ins->declaration.index_range.dst, true, ""); + shader_print_uint_literal(compiler, " ", ins->declaration.index_range.register_count, ""); + break; + +@@ -1840,41 +1961,32 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + break; + + case VKD3DSIH_DCL_INPUT_PS: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_interpolation_mode(compiler, ins->flags); +- shader_addline(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.dst, true); ++ shader_print_interpolation_mode(compiler, " ", ins->flags, ""); ++ shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); + break; + + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_OUTPUT_SIV: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); +- shader_addline(buffer, ", "); +- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); ++ shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); ++ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); + break; + + case VKD3DSIH_DCL_INPUT_PS_SIV: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_interpolation_mode(compiler, ins->flags); +- shader_addline(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); +- shader_addline(buffer, ", "); +- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); ++ shader_print_interpolation_mode(compiler, " ", ins->flags, ""); ++ shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); ++ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); + break; + + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_OUTPUT: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.dst, true); ++ shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); + break; + + case VKD3DSIH_DCL_INPUT_PRIMITIVE: + case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_primitive_type(compiler, &ins->declaration.primitive_type); ++ shader_print_primitive_type(compiler, " ", &ins->declaration.primitive_type, ""); + break; + + case VKD3DSIH_DCL_INTERFACE: +@@ -1885,23 +1997,19 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + break; + + case VKD3DSIH_DCL_RESOURCE_RAW: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); ++ shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); + shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); + break; + + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); ++ shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); + shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); + shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); + break; + + case VKD3DSIH_DCL_SAMPLER: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_register(compiler, &ins->declaration.sampler.src.reg, true); +- if (ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE) +- shader_addline(buffer, ", comparisonMode"); ++ shader_print_register(compiler, " ", &ins->declaration.sampler.src.reg, true, ++ ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE ? ", comparisonMode" : ""); + shader_dump_register_space(compiler, ins->declaration.sampler.range.space); + break; + +@@ -1916,29 +2024,24 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + break; + + case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_tessellator_domain(compiler, ins->declaration.tessellator_domain); ++ shader_print_tessellator_domain(compiler, " ", ins->declaration.tessellator_domain, ""); + break; + + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_tessellator_output_primitive(compiler, ins->declaration.tessellator_output_primitive); ++ shader_print_tessellator_output_primitive(compiler, " ", ins->declaration.tessellator_output_primitive, ""); + break; + + case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_tessellator_partitioning(compiler, ins->declaration.tessellator_partitioning); ++ shader_print_tessellator_partitioning(compiler, " ", ins->declaration.tessellator_partitioning, ""); + break; + + case VKD3DSIH_DCL_TGSM_RAW: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.tgsm_raw.reg, true); ++ shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_raw.reg, true, ""); + shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_raw.byte_count, ""); + break; + + case VKD3DSIH_DCL_TGSM_STRUCTURED: +- vkd3d_string_buffer_printf(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.tgsm_structured.reg, true); ++ shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_structured.reg, true, ""); + shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.byte_stride, ""); + shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.structure_count, ""); + break; +@@ -1951,15 +2054,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + + case VKD3DSIH_DCL_UAV_RAW: + shader_dump_uav_flags(compiler, ins->flags); +- shader_addline(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); ++ shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); + shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); + break; + + case VKD3DSIH_DCL_UAV_STRUCTURED: + shader_dump_uav_flags(compiler, ins->flags); +- shader_addline(buffer, " "); +- shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); ++ shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); + shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); + shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); + break; +@@ -1994,7 +2095,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + + if (ins->resource_type != VKD3D_SHADER_RESOURCE_NONE) + { +- shader_addline(buffer, "_indexable("); ++ vkd3d_string_buffer_printf(buffer, "_indexable("); + if (ins->raw) + vkd3d_string_buffer_printf(buffer, "raw_"); + if (ins->structured) +@@ -2002,7 +2103,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + shader_dump_resource_type(compiler, ins->resource_type); + if (ins->resource_stride) + shader_print_uint_literal(compiler, ", stride=", ins->resource_stride, ""); +- shader_addline(buffer, ")"); ++ vkd3d_string_buffer_printf(buffer, ")"); + } + + if (vkd3d_shader_instruction_has_texel_offset(ins)) +@@ -2021,37 +2122,200 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + for (i = 0; i < ins->dst_count; ++i) + { + shader_dump_ins_modifiers(compiler, &ins->dst[i]); +- shader_addline(buffer, !i ? " " : ", "); +- shader_dump_dst_param(compiler, &ins->dst[i], false); ++ shader_print_dst_param(compiler, !i ? " " : ", ", &ins->dst[i], false, ""); + } + + /* Other source tokens */ + for (i = ins->dst_count; i < (ins->dst_count + ins->src_count); ++i) + { +- shader_addline(buffer, !i ? " " : ", "); +- shader_dump_src_param(compiler, &ins->src[i - ins->dst_count]); ++ shader_print_src_param(compiler, !i ? " " : ", ", &ins->src[i - ins->dst_count], ""); + } + break; + } + +- shader_addline(buffer, "\n"); ++ vkd3d_string_buffer_printf(buffer, "\n"); ++} ++ ++static const char *get_sysval_semantic_name(enum vkd3d_shader_sysval_semantic semantic) ++{ ++ switch (semantic) ++ { ++ case VKD3D_SHADER_SV_NONE: return "NONE"; ++ case VKD3D_SHADER_SV_POSITION: return "POS"; ++ case VKD3D_SHADER_SV_CLIP_DISTANCE: return "CLIPDST"; ++ case VKD3D_SHADER_SV_CULL_DISTANCE: return "CULLDST"; ++ case VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX: return "RTINDEX"; ++ case VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX: return "VPINDEX"; ++ case VKD3D_SHADER_SV_VERTEX_ID: return "VERTID"; ++ case VKD3D_SHADER_SV_PRIMITIVE_ID: return "PRIMID"; ++ case VKD3D_SHADER_SV_INSTANCE_ID: return "INSTID"; ++ case VKD3D_SHADER_SV_IS_FRONT_FACE: return "FFACE"; ++ case VKD3D_SHADER_SV_SAMPLE_INDEX: return "SAMPLE"; ++ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: return "QUADEDGE"; ++ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: return "QUADINT"; ++ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: return "TRIEDGE"; ++ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: return "TRIINT"; ++ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: return "LINEDET"; ++ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: return "LINEDEN"; ++ case VKD3D_SHADER_SV_TARGET: return "TARGET"; ++ case VKD3D_SHADER_SV_DEPTH: return "DEPTH"; ++ case VKD3D_SHADER_SV_COVERAGE: return "COVERAGE"; ++ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "DEPTHGE"; ++ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "DEPTHLE"; ++ case VKD3D_SHADER_SV_STENCIL_REF: return "STENCILREF"; ++ default: return "??"; ++ } ++} ++ ++static const char *get_component_type_name(enum vkd3d_shader_component_type type) ++{ ++ switch (type) ++ { ++ case VKD3D_SHADER_COMPONENT_VOID: return "void"; ++ case VKD3D_SHADER_COMPONENT_UINT: return "uint"; ++ case VKD3D_SHADER_COMPONENT_INT: return "int"; ++ case VKD3D_SHADER_COMPONENT_FLOAT: return "float"; ++ case VKD3D_SHADER_COMPONENT_BOOL: return "bool"; ++ case VKD3D_SHADER_COMPONENT_DOUBLE: return "double"; ++ case VKD3D_SHADER_COMPONENT_UINT64: return "uint64"; ++ default: return "??"; ++ } + } + +-enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, ++static const char *get_minimum_precision_name(enum vkd3d_shader_minimum_precision prec) ++{ ++ switch (prec) ++ { ++ case VKD3D_SHADER_MINIMUM_PRECISION_NONE: return "NONE"; ++ case VKD3D_SHADER_MINIMUM_PRECISION_FLOAT_16: return "FLOAT_16"; ++ case VKD3D_SHADER_MINIMUM_PRECISION_FIXED_8_2: return "FIXED_8_2"; ++ case VKD3D_SHADER_MINIMUM_PRECISION_INT_16: return "INT_16"; ++ case VKD3D_SHADER_MINIMUM_PRECISION_UINT_16: return "UINT_16"; ++ default: return "??"; ++ } ++} ++ ++static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic semantic) ++{ ++ switch (semantic) ++ { ++ case VKD3D_SHADER_SV_DEPTH: return "oDepth"; ++ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; ++ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; ++ /* SV_Coverage has name vCoverage when used as an input, ++ * but it doens't appear in the signature in that case. */ ++ case VKD3D_SHADER_SV_COVERAGE: return "oMask"; ++ case VKD3D_SHADER_SV_STENCIL_REF: return "oStencilRef"; ++ default: return "??"; ++ } ++} ++ ++static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, ++ const char *name, const char *register_name, const struct shader_signature *signature) ++{ ++ struct vkd3d_string_buffer *buffer = &compiler->buffer; ++ unsigned int i; ++ ++ if (signature->element_count == 0) ++ return VKD3D_OK; ++ ++ vkd3d_string_buffer_printf(buffer, "%s%s%s\n", ++ compiler->colours.opcode, name, compiler->colours.reset); ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ struct signature_element *element = &signature->elements[i]; ++ ++ vkd3d_string_buffer_printf(buffer, "%s.param%s %s", compiler->colours.opcode, ++ compiler->colours.reset, element->semantic_name); ++ ++ if (element->semantic_index != 0) ++ vkd3d_string_buffer_printf(buffer, "%u", element->semantic_index); ++ ++ if (element->register_index != -1) ++ { ++ shader_print_write_mask(compiler, "", element->mask, ""); ++ vkd3d_string_buffer_printf(buffer, ", %s%s%d%s", compiler->colours.reg, ++ register_name, element->register_index, compiler->colours.reset); ++ shader_print_write_mask(compiler, "", element->used_mask, ""); ++ } ++ else ++ { ++ vkd3d_string_buffer_printf(buffer, ", %s%s%s", compiler->colours.reg, ++ get_semantic_register_name(element->sysval_semantic), compiler->colours.reset); ++ } ++ ++ if (!element->component_type && !element->sysval_semantic ++ && !element->min_precision && !element->stream_index) ++ goto done; ++ ++ vkd3d_string_buffer_printf(buffer, ", %s", ++ get_component_type_name(element->component_type)); ++ ++ if (!element->sysval_semantic && !element->min_precision && !element->stream_index) ++ goto done; ++ ++ vkd3d_string_buffer_printf(buffer, ", %s", ++ get_sysval_semantic_name(element->sysval_semantic)); ++ ++ if (!element->min_precision && !element->stream_index) ++ goto done; ++ ++ vkd3d_string_buffer_printf(buffer, ", %s", ++ get_minimum_precision_name(element->min_precision)); ++ ++ if (!element->stream_index) ++ goto done; ++ ++ vkd3d_string_buffer_printf(buffer, ", m%u", ++ element->stream_index); ++ ++ done: ++ vkd3d_string_buffer_printf(buffer, "\n"); ++ } ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, ++ const struct vsir_program *program) ++{ ++ enum vkd3d_result ret; ++ ++ if ((ret = dump_signature(compiler, ".input", ++ program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", ++ &program->input_signature)) < 0) ++ return ret; ++ ++ if ((ret = dump_signature(compiler, ".output", "o", ++ &program->output_signature)) < 0) ++ return ret; ++ ++ if ((ret = dump_signature(compiler, ".patch_constant", ++ program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", ++ &program->patch_constant_signature)) < 0) ++ return ret; ++ ++ vkd3d_string_buffer_printf(&compiler->buffer, "%s.text%s\n", ++ compiler->colours.opcode, compiler->colours.reset); ++ ++ return VKD3D_OK; ++} ++ ++enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect) ++ struct vkd3d_shader_code *out, enum vsir_asm_flags flags) + { + const struct vkd3d_shader_version *shader_version = &program->shader_version; + enum vkd3d_shader_compile_option_formatting_flags formatting; + struct vkd3d_d3d_asm_compiler compiler = + { +- .dialect = dialect, ++ .flags = flags, + }; + enum vkd3d_result result = VKD3D_OK; + struct vkd3d_string_buffer *buffer; + unsigned int indent, i, j; + const char *indent_str; +- void *code; + + static const struct vkd3d_d3d_asm_colours no_colours = + { +@@ -2064,6 +2328,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, + .swizzle = "", + .version = "", + .write_mask = "", ++ .label = "", + }; + static const struct vkd3d_d3d_asm_colours colours = + { +@@ -2076,6 +2341,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, + .swizzle = "\x1b[93m", + .version = "\x1b[36m", + .write_mask = "\x1b[93m", ++ .label = "\x1b[91m", + }; + + formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT +@@ -2109,6 +2375,17 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, + shader_get_type_prefix(shader_version->type), shader_version->major, + shader_version->minor, compiler.colours.reset); + ++ /* The signatures we emit only make sense for DXBC shaders. D3DBC ++ * doesn't even have an explicit concept of signature. */ ++ if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) ++ { ++ if ((result = dump_signatures(&compiler, program)) < 0) ++ { ++ vkd3d_string_buffer_cleanup(buffer); ++ return result; ++ } ++ } ++ + indent = 0; + for (i = 0; i < program->instructions.count; ++i) + { +@@ -2124,6 +2401,14 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, + --indent; + break; + ++ case VKD3DSIH_LABEL: ++ case VKD3DSIH_HS_DECLS: ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: ++ indent = 0; ++ break; ++ + default: + break; + } +@@ -2142,6 +2427,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, + case VKD3DSIH_IFC: + case VKD3DSIH_LOOP: + case VKD3DSIH_SWITCH: ++ case VKD3DSIH_LABEL: + ++indent; + break; + +@@ -2150,18 +2436,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, + } + } + +- if ((code = vkd3d_malloc(buffer->content_size))) +- { +- memcpy(code, buffer->buffer, buffer->content_size); +- out->size = buffer->content_size; +- out->code = code; +- } +- else +- { +- result = VKD3D_ERROR_OUT_OF_MEMORY; +- } +- +- vkd3d_string_buffer_cleanup(buffer); ++ vkd3d_shader_code_from_string_buffer(out, buffer); + + return result; + } +@@ -2171,7 +2446,7 @@ void vkd3d_shader_trace(const struct vsir_program *program) + const char *p, *q, *end; + struct vkd3d_shader_code code; + +- if (vkd3d_dxbc_binary_to_text(program, NULL, &code, VSIR_ASM_VSIR) != VKD3D_OK) ++ if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) + return; + + end = (const char *)code.code + code.size; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 27f5c810436..ace7694a59e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -541,9 +541,9 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp + struct signature_element *element; + + if (output) +- signature = &sm1->p.shader_desc.output_signature; ++ signature = &sm1->p.program.output_signature; + else +- signature = &sm1->p.shader_desc.input_signature; ++ signature = &sm1->p.program.input_signature; + + if ((element = find_signature_element(signature, name, index))) + { +@@ -581,9 +581,9 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, + struct signature_element *element; + + if (output) +- signature = &sm1->p.shader_desc.output_signature; ++ signature = &sm1->p.program.output_signature; + else +- signature = &sm1->p.shader_desc.input_signature; ++ signature = &sm1->p.program.input_signature; + + if (!(element = find_signature_element_by_register_index(signature, register_index))) + { +@@ -886,7 +886,6 @@ static void shader_sm1_destroy(struct vkd3d_shader_parser *parser) + struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); + + vsir_program_cleanup(&parser->program); +- free_shader_desc(&sm1->p.shader_desc); + vkd3d_free(sm1); + } + +@@ -1237,7 +1236,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + const uint32_t *code = compile_info->source.code; + size_t code_size = compile_info->source.size; +- struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_version version; + uint16_t shader_type; + size_t token_count; +@@ -1290,9 +1288,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, + if (!vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops, + code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- shader_desc = &sm1->p.shader_desc; +- shader_desc->byte_code = code; +- shader_desc->byte_code_size = code_size; + sm1->ptr = sm1->start; + + return VKD3D_OK; +@@ -1363,7 +1358,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); + + if (!sm1->p.failed) +- ret = vsir_validate(&sm1->p); ++ ret = vkd3d_shader_parser_validate(&sm1->p); + + if (sm1->p.failed && ret >= 0) + ret = VKD3D_ERROR_INVALID_SHADER; +@@ -1499,47 +1494,68 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns + return D3DPS_VERSION(major, minor); + } + +-static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) ++D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) + { + switch (type->class) + { + case HLSL_CLASS_ARRAY: +- return sm1_class(type->e.array.type); ++ return hlsl_sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else + return D3DXPC_MATRIX_ROWS; +- case HLSL_CLASS_OBJECT: +- return D3DXPC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3DXPC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3DXPC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3DXPC_VECTOR; +- default: +- ERR("Invalid class %#x.\n", type->class); +- vkd3d_unreachable(); ++ case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TEXTURE: ++ return D3DXPC_OBJECT; ++ case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VOID: ++ break; + } ++ ++ vkd3d_unreachable(); + } + +-static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) ++D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) + { +- switch (type->base_type) ++ switch (type->class) + { +- case HLSL_TYPE_BOOL: +- return D3DXPT_BOOL; +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- return D3DXPT_FLOAT; +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- return D3DXPT_INT; +- case HLSL_TYPE_PIXELSHADER: +- return D3DXPT_PIXELSHADER; +- case HLSL_TYPE_SAMPLER: ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_BOOL: ++ return D3DXPT_BOOL; ++ /* Actually double behaves differently depending on DLL version: ++ * For <= 36, it maps to D3DXPT_FLOAT. ++ * For 37-40, it maps to zero (D3DXPT_VOID). ++ * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* ++ * values are mostly compatible with D3DXPT_*). ++ * However, the latter two cases look like bugs, and a reasonable ++ * application certainly wouldn't know what to do with them. ++ * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ ++ case HLSL_TYPE_DOUBLE: ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ return D3DXPT_FLOAT; ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ return D3DXPT_INT; ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ case HLSL_CLASS_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: +@@ -1557,9 +1573,8 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) + vkd3d_unreachable(); + } + break; +- case HLSL_TYPE_STRING: +- return D3DXPT_STRING; +- case HLSL_TYPE_TEXTURE: ++ ++ case HLSL_CLASS_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: +@@ -1577,13 +1592,34 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) + vkd3d_unreachable(); + } + break; +- case HLSL_TYPE_VERTEXSHADER: +- return D3DXPT_VERTEXSHADER; +- case HLSL_TYPE_VOID: +- return D3DXPT_VOID; +- default: ++ ++ case HLSL_CLASS_OBJECT: ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_PIXELSHADER: ++ return D3DXPT_PIXELSHADER; ++ case HLSL_TYPE_VERTEXSHADER: ++ return D3DXPT_VERTEXSHADER; ++ default: ++ vkd3d_unreachable(); ++ } + vkd3d_unreachable(); ++ ++ case HLSL_CLASS_ARRAY: ++ return hlsl_sm1_base_type(type->e.array.type); ++ ++ case HLSL_CLASS_STRUCT: ++ return D3DXPT_VOID; ++ ++ case HLSL_CLASS_STRING: ++ return D3DXPT_STRING; ++ ++ case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VOID: ++ break; + } ++ ++ vkd3d_unreachable(); + } + + static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +@@ -1620,7 +1656,7 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ + } + } + +- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); ++ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); +@@ -1977,16 +2013,13 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- /* Integers are internally represented as floats, so no change is necessary.*/ ++ case HLSL_TYPE_BOOL: ++ /* Integrals are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + break; + +- case HLSL_TYPE_BOOL: +- hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to float."); +- break; +- + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); + break; +@@ -2002,7 +2035,10 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- /* A compilation pass applies a FLOOR operation to casts to int, so no change is necessary. */ ++ /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not ++ * reach this case unless we are missing something. */ ++ hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); ++ break; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); +@@ -2067,6 +2103,9 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + D3DDECLUSAGE usage; + bool ret; + ++ if ((!output && !var->last_read) || (output && !var->first_write)) ++ return; ++ + if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) + { + usage = 0; +@@ -2242,6 +2281,12 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + + assert(instr->reg.allocated); + ++ if (expr->op == HLSL_OP1_REINTERPRET) ++ { ++ write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ return; ++ } ++ + if (expr->op == HLSL_OP1_CAST) + { + write_sm1_cast(ctx, buffer, instr); +@@ -2329,7 +2374,23 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + break; + ++ case HLSL_OP2_LOGIC_AND: ++ write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); ++ break; ++ ++ case HLSL_OP2_LOGIC_OR: ++ write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); ++ break; ++ ++ case HLSL_OP2_SLT: ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); ++ write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); ++ break; ++ + case HLSL_OP3_CMP: ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) ++ hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); + write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + +@@ -2488,7 +2549,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + + if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) + { +- FIXME("Matrix writemasks need to be lowered.\n"); ++ hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks.\n"); + return; + } + +@@ -2552,19 +2613,11 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + { + if (instr->data_type) + { +- if (instr->data_type->class == HLSL_CLASS_MATRIX) ++ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { +- /* These need to be lowered. */ +- hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); +- continue; +- } +- else if (instr->data_type->class == HLSL_CLASS_OBJECT) +- { +- hlsl_fixme(ctx, &instr->loc, "Object copy."); ++ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); + break; + } +- +- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + } + + switch (instr->type) +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +index 37ebc73c099..8a1012d909b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +@@ -150,7 +150,7 @@ static const char *shader_get_string(const char *data, size_t data_size, size_t + } + + static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, +- const char *source_name, struct vkd3d_shader_dxbc_desc *desc) ++ const char *source_name, uint32_t flags, struct vkd3d_shader_dxbc_desc *desc) + { + const struct vkd3d_shader_location location = {.source_name = source_name}; + struct vkd3d_shader_dxbc_section_desc *sections, *section; +@@ -186,17 +186,20 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ + checksum[1] = read_u32(&ptr); + checksum[2] = read_u32(&ptr); + checksum[3] = read_u32(&ptr); +- vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); +- if (memcmp(checksum, calculated_checksum, sizeof(checksum))) +- { +- WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " +- "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", +- checksum[0], checksum[1], checksum[2], checksum[3], +- calculated_checksum[0], calculated_checksum[1], +- calculated_checksum[2], calculated_checksum[3]); +- vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, +- "Invalid DXBC checksum."); +- return VKD3D_ERROR_INVALID_ARGUMENT; ++ if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) ++ { ++ vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); ++ if (memcmp(checksum, calculated_checksum, sizeof(checksum))) ++ { ++ WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " ++ "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", ++ checksum[0], checksum[1], checksum[2], checksum[3], ++ calculated_checksum[0], calculated_checksum[1], ++ calculated_checksum[2], calculated_checksum[3]); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, ++ "Invalid DXBC checksum."); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } + } + + version = read_u32(&ptr); +@@ -287,7 +290,7 @@ static int for_each_dxbc_section(const struct vkd3d_shader_code *dxbc, + unsigned int i; + int ret; + +- if ((ret = parse_dxbc(dxbc, message_context, source_name, &desc)) < 0) ++ if ((ret = parse_dxbc(dxbc, message_context, source_name, 0, &desc)) < 0) + return ret; + + for (i = 0; i < desc.section_count; ++i) +@@ -313,7 +316,7 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, + *messages = NULL; + vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); + +- ret = parse_dxbc(dxbc, &message_context, NULL, desc); ++ ret = parse_dxbc(dxbc, &message_context, NULL, flags, desc); + + vkd3d_shader_message_context_trace_messages(&message_context); + if (!vkd3d_shader_message_context_copy_messages(&message_context, messages) && ret >= 0) +@@ -485,7 +488,7 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + struct vkd3d_shader_message_context *message_context, void *context) + { +- struct vkd3d_shader_desc *desc = context; ++ struct dxbc_shader_desc *desc = context; + int ret; + + switch (section->tag) +@@ -550,7 +553,7 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + return VKD3D_OK; + } + +-void free_shader_desc(struct vkd3d_shader_desc *desc) ++void free_dxbc_shader_desc(struct dxbc_shader_desc *desc) + { + shader_signature_cleanup(&desc->input_signature); + shader_signature_cleanup(&desc->output_signature); +@@ -558,7 +561,7 @@ void free_shader_desc(struct vkd3d_shader_desc *desc) + } + + int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, +- struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) ++ struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc) + { + int ret; + +@@ -569,7 +572,7 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + if (ret < 0) + { + WARN("Failed to parse shader, vkd3d result %d.\n", ret); +- free_shader_desc(desc); ++ free_dxbc_shader_desc(desc); + } + + return ret; +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 26a8a5c1cc3..220ba773887 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -342,6 +342,8 @@ enum dx_intrinsic_opcode + { + DX_LOAD_INPUT = 4, + DX_STORE_OUTPUT = 5, ++ DX_FABS = 6, ++ DX_SATURATE = 7, + DX_ISNAN = 8, + DX_ISINF = 9, + DX_ISFINITE = 10, +@@ -374,8 +376,15 @@ enum dx_intrinsic_opcode + DX_IMIN = 38, + DX_UMAX = 39, + DX_UMIN = 40, ++ DX_FMAD = 46, ++ DX_FMA = 47, ++ DX_IMAD = 48, ++ DX_UMAD = 49, + DX_IBFE = 51, + DX_UBFE = 52, ++ DX_DOT2 = 54, ++ DX_DOT3 = 55, ++ DX_DOT4 = 56, + DX_CREATE_HANDLE = 57, + DX_CBUFFER_LOAD_LEGACY = 59, + DX_SAMPLE = 60, +@@ -388,14 +397,29 @@ enum dx_intrinsic_opcode + DX_TEXTURE_STORE = 67, + DX_BUFFER_LOAD = 68, + DX_BUFFER_STORE = 69, ++ DX_BUFFER_UPDATE_COUNTER = 70, + DX_GET_DIMENSIONS = 72, ++ DX_TEXTURE_GATHER = 73, ++ DX_TEXTURE_GATHER_CMP = 74, ++ DX_TEX2DMS_GET_SAMPLE_POS = 75, ++ DX_RT_GET_SAMPLE_POS = 76, ++ DX_RT_GET_SAMPLE_COUNT = 77, + DX_ATOMIC_BINOP = 78, + DX_ATOMIC_CMP_XCHG = 79, ++ DX_BARRIER = 80, ++ DX_CALCULATE_LOD = 81, ++ DX_DISCARD = 82, + DX_DERIV_COARSEX = 83, + DX_DERIV_COARSEY = 84, + DX_DERIV_FINEX = 85, + DX_DERIV_FINEY = 86, ++ DX_THREAD_ID = 93, ++ DX_GROUP_ID = 94, ++ DX_THREAD_ID_IN_GROUP = 95, ++ DX_FLATTENED_THREAD_ID_IN_GROUP = 96, ++ DX_MAKE_DOUBLE = 101, + DX_SPLIT_DOUBLE = 102, ++ DX_PRIMITIVE_ID = 108, + DX_LEGACY_F32TOF16 = 130, + DX_LEGACY_F16TOF32 = 131, + DX_RAW_BUFFER_LOAD = 139, +@@ -449,6 +473,32 @@ enum dxil_predicate + ICMP_SLE = 41, + }; + ++enum dxil_rmw_code ++{ ++ RMW_XCHG = 0, ++ RMW_ADD = 1, ++ RMW_SUB = 2, ++ RMW_AND = 3, ++ RMW_NAND = 4, ++ RMW_OR = 5, ++ RMW_XOR = 6, ++ RMW_MAX = 7, ++ RMW_MIN = 8, ++ RMW_UMAX = 9, ++ RMW_UMIN = 10, ++}; ++ ++enum dxil_atomic_ordering ++{ ++ ORDERING_NOTATOMIC = 0, ++ ORDERING_UNORDERED = 1, ++ ORDERING_MONOTONIC = 2, ++ ORDERING_ACQUIRE = 3, ++ ORDERING_RELEASE = 4, ++ ORDERING_ACQREL = 5, ++ ORDERING_SEQCST = 6, ++}; ++ + enum dxil_atomic_binop_code + { + ATOMIC_BINOP_ADD, +@@ -463,6 +513,14 @@ enum dxil_atomic_binop_code + ATOMIC_BINOP_INVALID, + }; + ++enum dxil_sync_flags ++{ ++ SYNC_THREAD_GROUP = 0x1, ++ SYNC_GLOBAL_UAV = 0x2, ++ SYNC_THREAD_GROUP_UAV = 0x4, ++ SYNC_GROUP_SHARED_MEMORY = 0x8, ++}; ++ + struct sm6_pointer_info + { + const struct sm6_type *type; +@@ -541,6 +599,7 @@ struct sm6_value + { + const struct sm6_type *type; + enum sm6_value_type value_type; ++ unsigned int structure_stride; + bool is_undefined; + union + { +@@ -736,9 +795,11 @@ struct sm6_parser + size_t global_symbol_count; + + const char *entry_point; ++ const char *patch_constant_function; + + struct vkd3d_shader_dst_param *output_params; + struct vkd3d_shader_dst_param *input_params; ++ uint32_t io_regs_declared[(VKD3DSPR_COUNT + 0x1f) / 0x20]; + + struct sm6_function *functions; + size_t function_count; +@@ -753,6 +814,7 @@ struct sm6_parser + + unsigned int indexable_temp_count; + unsigned int icb_count; ++ unsigned int tgsm_count; + + struct sm6_value *values; + size_t value_count; +@@ -1876,6 +1938,25 @@ static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type + return NULL; + } + ++static const struct sm6_type *sm6_type_get_cmpxchg_result_struct(struct sm6_parser *sm6) ++{ ++ const struct sm6_type *type; ++ unsigned int i; ++ ++ for (i = 0; i < sm6->type_count; ++i) ++ { ++ type = &sm6->types[i]; ++ if (sm6_type_is_struct(type) && type->u.struc->elem_count == 2 ++ && sm6_type_is_i32(type->u.struc->elem_types[0]) ++ && sm6_type_is_bool(type->u.struc->elem_types[1])) ++ { ++ return type; ++ } ++ } ++ ++ return NULL; ++} ++ + /* Call for aggregate types only. */ + static const struct sm6_type *sm6_type_get_element_type_at_index(const struct sm6_type *type, uint64_t elem_idx) + { +@@ -2110,6 +2191,15 @@ static inline bool sm6_value_is_undef(const struct sm6_value *value) + return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; + } + ++static bool sm6_value_vector_is_constant_or_undef(const struct sm6_value **values, unsigned int count) ++{ ++ unsigned int i; ++ for (i = 0; i < count; ++i) ++ if (!sm6_value_is_constant(values[i]) && !sm6_value_is_undef(values[i])) ++ return false; ++ return true; ++} ++ + static bool sm6_value_is_icb(const struct sm6_value *value) + { + return value->value_type == VALUE_TYPE_ICB; +@@ -2199,6 +2289,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type + return VKD3D_DATA_BOOL; + case 8: + return VKD3D_DATA_UINT8; ++ case 16: ++ return VKD3D_DATA_UINT16; + case 32: + return VKD3D_DATA_UINT; + case 64: +@@ -2212,6 +2304,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type + { + switch (type->u.width) + { ++ case 16: ++ return VKD3D_DATA_HALF; + case 32: + return VKD3D_DATA_FLOAT; + case 64: +@@ -2252,6 +2346,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st + register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6); + } + ++static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value) ++{ ++ vsir_register_init(reg, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); ++ reg->u.immconst_u32[0] = value; ++} ++ + static void dst_param_init(struct vkd3d_shader_dst_param *param) + { + param->write_mask = VKD3DSP_WRITEMASK_0; +@@ -2301,6 +2401,12 @@ static void src_param_init_scalar(struct vkd3d_shader_src_param *param, unsigned + param->modifiers = VKD3DSPSM_NONE; + } + ++static void src_param_init_vector(struct vkd3d_shader_src_param *param, unsigned int component_count) ++{ ++ param->swizzle = VKD3D_SHADER_NO_SWIZZLE & ((1ull << VKD3D_SHADER_SWIZZLE_SHIFT(component_count)) - 1); ++ param->modifiers = VKD3DSPSM_NONE; ++} ++ + static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) + { + src_param_init(param); +@@ -2315,6 +2421,12 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, + param->reg = *reg; + } + ++static void src_param_make_constant_uint(struct vkd3d_shader_src_param *param, unsigned int value) ++{ ++ src_param_init(param); ++ register_make_constant_uint(¶m->reg, value); ++} ++ + static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, + struct sm6_parser *sm6) + { +@@ -2336,14 +2448,18 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, + } + } + +-static void instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) ++static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) + { +- struct vkd3d_shader_dst_param *param = instruction_dst_params_alloc(ins, 1, sm6); + struct sm6_value *dst = sm6_parser_get_current_value(sm6); ++ struct vkd3d_shader_dst_param *param; ++ ++ if (!(param = instruction_dst_params_alloc(ins, 1, sm6))) ++ return false; + + dst_param_init_ssa_scalar(param, dst->type, dst, sm6); + param->write_mask = VKD3DSP_WRITEMASK_0; + dst->u.reg = param->reg; ++ return true; + } + + static void instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instruction *ins, +@@ -2482,6 +2598,26 @@ static bool sm6_value_validate_is_texture_handle(const struct sm6_value *value, + return true; + } + ++static bool sm6_value_validate_is_texture_2dms_handle(const struct sm6_value *value, enum dx_intrinsic_opcode op, ++ struct sm6_parser *sm6) ++{ ++ enum dxil_resource_kind kind; ++ ++ if (!sm6_value_validate_is_handle(value, sm6)) ++ return false; ++ ++ kind = value->u.handle.d->kind; ++ if (!resource_kind_is_multisampled(kind)) ++ { ++ WARN("Resource kind %u for op %u is not a 2DMS texture.\n", kind, op); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCE_HANDLE, ++ "Resource kind %u for texture operation %u is not a 2DMS texture.", kind, op); ++ return false; ++ } ++ ++ return true; ++} ++ + static bool sm6_value_validate_is_sampler_handle(const struct sm6_value *value, enum dx_intrinsic_opcode op, + struct sm6_parser *sm6) + { +@@ -2539,6 +2675,30 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 + return true; + } + ++static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6) ++{ ++ if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type)) ++ { ++ WARN("Operand result type %u is not a pointer to i32.\n", value->type->class); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "An int32 pointer operand passed to a DXIL instruction is not an int32 pointer."); ++ return false; ++ } ++ return true; ++} ++ ++static bool sm6_value_validate_is_i32(const struct sm6_value *value, struct sm6_parser *sm6) ++{ ++ if (!sm6_type_is_i32(value->type)) ++ { ++ WARN("Operand result type %u is not i32.\n", value->type->class); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "An int32 operand passed to a DXIL instruction is not an int32."); ++ return false; ++ } ++ return true; ++} ++ + static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) + { + if (idx < sm6->value_count) +@@ -2686,7 +2846,7 @@ static inline uint64_t decode_rotated_signed_value(uint64_t value) + return value << 63; + } + +-static inline float bitcast_uint64_to_float(uint64_t value) ++static float bitcast_uint_to_float(unsigned int value) + { + union + { +@@ -2710,6 +2870,23 @@ static inline double bitcast_uint64_to_double(uint64_t value) + return u.double_value; + } + ++static float register_get_float_value(const struct vkd3d_shader_register *reg) ++{ ++ if (!register_is_constant(reg) || !data_type_is_floating_point(reg->data_type)) ++ return 0.0; ++ ++ if (reg->dimension == VSIR_DIMENSION_VEC4) ++ WARN("Returning vec4.x.\n"); ++ ++ if (reg->type == VKD3DSPR_IMMCONST64) ++ { ++ WARN("Truncating double to float.\n"); ++ return bitcast_uint64_to_double(reg->u.immconst_u64[0]); ++ } ++ ++ return bitcast_uint_to_float(reg->u.immconst_u32[0]); ++} ++ + static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, const struct sm6_type *type, + const uint64_t *operands, struct sm6_parser *sm6) + { +@@ -2782,6 +2959,122 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co + return VKD3D_OK; + } + ++static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, const struct dxil_record *record, ++ struct sm6_value *dst) ++{ ++ const struct sm6_type *elem_type, *pointee_type, *gep_type, *ptr_type; ++ struct sm6_value *operands[3]; ++ unsigned int i, j, offset; ++ uint64_t value; ++ ++ i = 0; ++ pointee_type = (record->operand_count & 1) ? sm6_parser_get_type(sm6, record->operands[i++]) : NULL; ++ ++ if (!dxil_record_validate_operand_count(record, i + 6, i + 6, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ for (j = 0; i < record->operand_count; i += 2, ++j) ++ { ++ if (!(elem_type = sm6_parser_get_type(sm6, record->operands[i]))) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if ((value = record->operands[i + 1]) >= sm6->cur_max_value) ++ { ++ WARN("Invalid value index %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value index %"PRIu64".", value); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ else if (value == sm6->value_count) ++ { ++ WARN("Invalid value self-reference at %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value self-reference for a constexpr GEP."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ operands[j] = &sm6->values[value]; ++ if (value > sm6->value_count) ++ { ++ operands[j]->type = elem_type; ++ } ++ else if (operands[j]->type != elem_type) ++ { ++ WARN("Type mismatch.\n"); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, ++ "Type mismatch in constexpr GEP elements."); ++ } ++ } ++ ++ if (operands[0]->u.reg.idx_count > 1) ++ { ++ WARN("Unsupported stacked GEP.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "A GEP instruction on the result of a previous GEP is unsupported."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!sm6_value_is_constant_zero(operands[1])) ++ { ++ WARN("Expected constant zero.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "The pointer dereference index for a constexpr GEP instruction is not constant zero."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (!sm6_value_is_constant(operands[2]) || !sm6_type_is_integer(operands[2]->type)) ++ { ++ WARN("Element index is not constant int.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "A constexpr GEP element index is not a constant integer."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ dst->structure_stride = operands[0]->structure_stride; ++ ++ ptr_type = operands[0]->type; ++ if (!sm6_type_is_pointer(ptr_type)) ++ { ++ WARN("Constexpr GEP base value is not a pointer.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "A constexpr GEP base value is not a pointer."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!pointee_type) ++ { ++ pointee_type = ptr_type->u.pointer.type; ++ } ++ else if (pointee_type != ptr_type->u.pointer.type) ++ { ++ WARN("Explicit pointee type mismatch.\n"); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, ++ "Explicit pointee type for constexpr GEP does not match the element type."); ++ } ++ ++ offset = sm6_value_get_constant_uint(operands[2]); ++ if (!(gep_type = sm6_type_get_element_type_at_index(pointee_type, offset))) ++ { ++ WARN("Failed to get element type.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Failed to get the element type of a constexpr GEP."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!(dst->type = sm6_type_get_pointer_to_type(gep_type, ptr_type->u.pointer.addr_space, sm6))) ++ { ++ WARN("Failed to get pointer type for type %u.\n", gep_type->class); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "Module does not define a pointer type for a constexpr GEP result."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ dst->u.reg = operands[0]->u.reg; ++ dst->u.reg.idx[1].offset = offset; ++ dst->u.reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP; ++ dst->u.reg.idx_count = 2; ++ ++ return VKD3D_OK; ++} ++ + static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) + { + enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; +@@ -2876,9 +3169,9 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + } + + if (type->u.width == 16) +- FIXME("Half float type is not supported yet.\n"); ++ dst->u.reg.u.immconst_u32[0] = record->operands[0]; + else if (type->u.width == 32) +- dst->u.reg.u.immconst_f32[0] = bitcast_uint64_to_float(record->operands[0]); ++ dst->u.reg.u.immconst_f32[0] = bitcast_uint_to_float(record->operands[0]); + else if (type->u.width == 64) + dst->u.reg.u.immconst_f64[0] = bitcast_uint64_to_double(record->operands[0]); + else +@@ -2902,6 +3195,12 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + + break; + ++ case CST_CODE_CE_GEP: ++ case CST_CODE_CE_INBOUNDS_GEP: ++ if ((ret = sm6_parser_init_constexpr_gep(sm6, record, dst)) < 0) ++ return ret; ++ break; ++ + case CST_CODE_UNDEF: + dxil_record_validate_operand_max_count(record, 0, sm6); + dst->u.reg.type = VKD3DSPR_UNDEF; +@@ -2911,6 +3210,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + + default: + FIXME("Unhandled constant code %u.\n", record->code); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Constant code %u is unhandled.", record->code); + dst->u.reg.type = VKD3DSPR_UNDEF; + break; + } +@@ -2994,6 +3295,58 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru + register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); + } + ++static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type, ++ unsigned int alignment, unsigned int init, struct sm6_value *dst) ++{ ++ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); ++ struct vkd3d_shader_instruction *ins; ++ unsigned int byte_count; ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW); ++ dst_param_init(&ins->declaration.tgsm_raw.reg); ++ register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); ++ dst->u.reg = ins->declaration.tgsm_raw.reg.reg; ++ dst->structure_stride = 0; ++ ins->declaration.tgsm_raw.alignment = alignment; ++ byte_count = elem_type->u.width / 8u; ++ if (byte_count != 4) ++ { ++ FIXME("Unsupported byte count %u.\n", byte_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Raw TGSM byte count %u is not supported.", byte_count); ++ } ++ ins->declaration.tgsm_raw.byte_count = byte_count; ++ /* The initialiser value index will be resolved later when forward references can be handled. */ ++ ins->flags = init; ++} ++ ++static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const struct sm6_type *elem_type, ++ unsigned int count, unsigned int alignment, unsigned int init, struct sm6_value *dst) ++{ ++ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); ++ struct vkd3d_shader_instruction *ins; ++ unsigned int structure_stride; ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_STRUCTURED); ++ dst_param_init(&ins->declaration.tgsm_structured.reg); ++ register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM, ++ data_type, sm6->tgsm_count++); ++ dst->u.reg = ins->declaration.tgsm_structured.reg.reg; ++ structure_stride = elem_type->u.width / 8u; ++ if (structure_stride != 4) ++ { ++ FIXME("Unsupported structure stride %u.\n", structure_stride); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Structured TGSM byte stride %u is not supported.", structure_stride); ++ } ++ dst->structure_stride = structure_stride; ++ ins->declaration.tgsm_structured.alignment = alignment; ++ ins->declaration.tgsm_structured.byte_stride = structure_stride; ++ ins->declaration.tgsm_structured.structure_count = count; ++ /* The initialiser value index will be resolved later when forward references can be handled. */ ++ ins->flags = init; ++} ++ + static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) + { + const struct sm6_type *type, *scalar_type; +@@ -3119,10 +3472,17 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ + } + else if (address_space == ADDRESS_SPACE_GROUPSHARED) + { +- FIXME("Unsupported TGSM.\n"); +- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, +- "TGSM global variables are not supported."); +- return false; ++ if (!sm6_type_is_numeric(scalar_type)) ++ { ++ WARN("Unsupported type class %u.\n", scalar_type->class); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "TGSM variables of type class %u are not supported.", scalar_type->class); ++ return false; ++ } ++ if (count == 1) ++ sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); ++ else ++ sm6_parser_declare_tgsm_structured(sm6, scalar_type, count, alignment, init, dst); + } + else + { +@@ -3158,6 +3518,38 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init + return NULL; + } + ++static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6) ++{ ++ const struct sm6_value *value; ++ ++ if (!index) ++ return false; ++ ++ --index; ++ if (!(value = sm6_parser_get_value_safe(sm6, index)) ++ || (!sm6_value_is_icb(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) ++ { ++ WARN("Invalid initialiser index %zu.\n", index); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "TGSM initialiser value index %zu is invalid.", index); ++ return false; ++ } ++ else if ((sm6_value_is_icb(value) && value->u.icb->is_null) || sm6_value_is_constant_zero(value)) ++ { ++ return true; ++ } ++ else if (sm6_value_is_undef(value)) ++ { ++ /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */ ++ return false; ++ } ++ ++ FIXME("Non-zero initialisers are not supported.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Non-zero TGSM initialisers are not supported."); ++ return false; ++} ++ + static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) + { + size_t i, count, base_value_idx = sm6->value_count; +@@ -3231,6 +3623,16 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) + { + ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); + } ++ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) ++ { ++ ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ++ ins->flags = 0; ++ } ++ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) ++ { ++ ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ++ ins->flags = 0; ++ } + } + for (i = base_value_idx; i < sm6->value_count; ++i) + { +@@ -3270,6 +3672,22 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par + src_param_init_from_value(&src_params[i], operands[i]); + } + ++static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( ++ enum vkd3d_shader_sysval_semantic sysval_semantic) ++{ ++ switch (sysval_semantic) ++ { ++ case VKD3D_SHADER_SV_DEPTH: ++ return VKD3DSPR_DEPTHOUT; ++ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: ++ return VKD3DSPR_DEPTHOUTGE; ++ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: ++ return VKD3DSPR_DEPTHOUTLE; ++ default: ++ return VKD3DSPR_INVALID; ++ } ++} ++ + static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, + enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) + { +@@ -3282,6 +3700,13 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade + e = &s->elements[i]; + + param = ¶ms[i]; ++ ++ if (e->register_index == UINT_MAX) ++ { ++ dst_param_io_init(param, e, register_type_from_dxil_semantic_kind(e->sysval_semantic)); ++ continue; ++ } ++ + dst_param_io_init(param, e, reg_type); + count = 0; + if (e->register_count > 1) +@@ -3350,6 +3775,9 @@ struct function_emission_state + unsigned int temp_idx; + }; + ++static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, ++ unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg); ++ + static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, + struct vkd3d_shader_instruction *ins, struct sm6_value *dst) + { +@@ -3425,30 +3853,153 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec + sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); + } + +-static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, +- const struct sm6_type *type_b, struct sm6_parser *sm6) ++static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) + { +- bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); +- bool is_bool = sm6_type_is_bool(type_a); +- enum vkd3d_shader_opcode op; +- bool is_valid; +- +- if (!is_int && !sm6_type_is_floating_point(type_a)) +- { +- WARN("Argument type %u is not bool, int16/32/64 or floating point.\n", type_a->class); +- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, +- "An argument to a binary operation is not bool, int16/32/64 or floating point."); +- return VKD3DSIH_INVALID; +- } +- if (type_a != type_b) ++ switch (code) + { +- WARN("Type mismatch, type %u width %u vs type %u width %u.\n", type_a->class, +- type_a->u.width, type_b->class, type_b->u.width); +- vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, +- "Type mismatch in binary operation arguments."); ++ case RMW_ADD: ++ return VKD3DSIH_IMM_ATOMIC_IADD; ++ case RMW_AND: ++ return VKD3DSIH_IMM_ATOMIC_AND; ++ case RMW_MAX: ++ return VKD3DSIH_IMM_ATOMIC_IMAX; ++ case RMW_MIN: ++ return VKD3DSIH_IMM_ATOMIC_IMIN; ++ case RMW_OR: ++ return VKD3DSIH_IMM_ATOMIC_OR; ++ case RMW_UMAX: ++ return VKD3DSIH_IMM_ATOMIC_UMAX; ++ case RMW_UMIN: ++ return VKD3DSIH_IMM_ATOMIC_UMIN; ++ case RMW_XCHG: ++ return VKD3DSIH_IMM_ATOMIC_EXCH; ++ case RMW_XOR: ++ return VKD3DSIH_IMM_ATOMIC_XOR; ++ default: ++ /* DXIL currently doesn't use SUB and NAND. */ ++ return VKD3DSIH_INVALID; + } ++} + +- switch (code) ++static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, ++ struct function_emission_state *state, struct sm6_value *dst) ++{ ++ struct vkd3d_shader_register coord, const_offset, const_zero; ++ const struct vkd3d_shader_register *regs[2]; ++ struct vkd3d_shader_dst_param *dst_params; ++ struct vkd3d_shader_src_param *src_params; ++ struct vkd3d_shader_instruction *ins; ++ const struct sm6_value *ptr, *src; ++ enum vkd3d_shader_opcode op; ++ unsigned int i = 0; ++ bool is_volatile; ++ uint64_t code; ++ ++ if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) ++ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) ++ return; ++ ++ if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) ++ { ++ WARN("Register is not groupshared.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "The destination register for an atomicrmw instruction is not groupshared memory."); ++ return; ++ } ++ ++ dst->type = ptr->type->u.pointer.type; ++ ++ if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i))) ++ return; ++ ++ if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6)) ++ return; ++ ++ if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VKD3DSIH_INVALID) ++ { ++ FIXME("Unhandled atomicrmw op %"PRIu64".\n", code); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); ++ return; ++ } ++ ++ is_volatile = record->operands[i++]; ++ ++ /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ ++ if ((code = record->operands[i++]) != ORDERING_SEQCST) ++ FIXME("Unhandled atomic ordering %"PRIu64".\n", code); ++ ++ if ((code = record->operands[i]) != 1) ++ WARN("Ignoring synchronisation scope %"PRIu64".\n", code); ++ ++ if (ptr->structure_stride) ++ { ++ if (ptr->u.reg.idx[1].rel_addr) ++ { ++ regs[0] = &ptr->u.reg.idx[1].rel_addr->reg; ++ } ++ else ++ { ++ register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset); ++ regs[0] = &const_offset; ++ } ++ register_make_constant_uint(&const_zero, 0); ++ regs[1] = &const_zero; ++ if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) ++ return; ++ } ++ ++ ins = state->ins; ++ vsir_instruction_init(ins, &sm6->p.location, op); ++ ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; ++ ++ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) ++ return; ++ if (ptr->structure_stride) ++ src_param_init_vector_from_reg(&src_params[0], &coord); ++ else ++ src_param_make_constant_uint(&src_params[0], 0); ++ src_param_init_from_value(&src_params[1], src); ++ ++ dst_params = instruction_dst_params_alloc(ins, 2, sm6); ++ register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); ++ dst_param_init(&dst_params[0]); ++ ++ dst_params[1].reg = ptr->u.reg; ++ /* The groupshared register has data type UAV when accessed. */ ++ dst_params[1].reg.data_type = VKD3D_DATA_UAV; ++ dst_params[1].reg.idx[1].rel_addr = NULL; ++ dst_params[1].reg.idx[1].offset = ~0u; ++ dst_params[1].reg.idx_count = 1; ++ dst_param_init(&dst_params[1]); ++ ++ dst->u.reg = dst_params[0].reg; ++} ++ ++static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, ++ const struct sm6_type *type_b, struct sm6_parser *sm6) ++{ ++ bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); ++ bool is_bool = sm6_type_is_bool(type_a); ++ enum vkd3d_shader_opcode op; ++ bool is_valid; ++ ++ if (!is_int && !sm6_type_is_floating_point(type_a)) ++ { ++ WARN("Argument type %u is not bool, int16/32/64 or floating point.\n", type_a->class); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "An argument to a binary operation is not bool, int16/32/64 or floating point."); ++ return VKD3DSIH_INVALID; ++ } ++ if (type_a != type_b) ++ { ++ WARN("Type mismatch, type %u width %u vs type %u width %u.\n", type_a->class, ++ type_a->u.width, type_b->class, type_b->u.width); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, ++ "Type mismatch in binary operation arguments."); ++ } ++ ++ switch (code) + { + case BINOP_ADD: + case BINOP_SUB: +@@ -3974,6 +4525,98 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr + dst->u.reg = dst_params[0].reg; + } + ++static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ enum dxil_sync_flags flags; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SYNC); ++ flags = sm6_value_get_constant_uint(operands[0]); ++ ins->flags = flags & (SYNC_THREAD_GROUP | SYNC_THREAD_GROUP_UAV); ++ if (flags & SYNC_GLOBAL_UAV) ++ ins->flags |= VKD3DSSF_GLOBAL_UAV; ++ if (flags & SYNC_GROUP_SHARED_MEMORY) ++ ins->flags |= VKD3DSSF_GROUP_SHARED_MEMORY; ++ if (flags &= ~(SYNC_THREAD_GROUP | SYNC_GLOBAL_UAV | SYNC_THREAD_GROUP_UAV | SYNC_GROUP_SHARED_MEMORY)) ++ { ++ FIXME("Unhandled flags %#x.\n", flags); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Barrier flags %#x are unhandled.", flags); ++ } ++} ++ ++static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_params; ++ const struct sm6_value *resource; ++ unsigned int i; ++ int8_t inc; ++ ++ resource = operands[0]; ++ if (!sm6_value_validate_is_handle(resource, sm6)) ++ return; ++ ++ if (!sm6_value_is_constant(operands[1])) ++ { ++ FIXME("Unsupported dynamic update operand.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "A dynamic update value for a UAV counter operation is not supported."); ++ return; ++ } ++ i = sm6_value_get_constant_uint(operands[1]); ++ if (i != 1 && i != 255) ++ { ++ WARN("Unexpected update value %#x.\n", i); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Update value %#x for a UAV counter operation is not supported.", i); ++ } ++ inc = i; ++ ++ vsir_instruction_init(ins, &sm6->p.location, (inc < 0) ? VKD3DSIH_IMM_ATOMIC_CONSUME : VKD3DSIH_IMM_ATOMIC_ALLOC); ++ if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_param_init_vector_from_reg(&src_params[0], &resource->u.handle.reg); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ ++static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ const struct sm6_value *resource, *sampler; ++ struct vkd3d_shader_src_param *src_params; ++ struct vkd3d_shader_instruction *ins; ++ struct vkd3d_shader_register coord; ++ unsigned int clamp; ++ ++ resource = operands[0]; ++ sampler = operands[1]; ++ if (!sm6_value_validate_is_texture_handle(resource, op, sm6) ++ || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) ++ { ++ return; ++ } ++ ++ if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], 3, NULL, state, &coord)) ++ return; ++ ++ clamp = sm6_value_get_constant_uint(operands[5]); ++ ++ ins = state->ins; ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LOD); ++ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) ++ return; ++ src_param_init_vector_from_reg(&src_params[0], &coord); ++ src_params[1].reg = resource->u.handle.reg; ++ src_param_init_scalar(&src_params[1], !clamp); ++ src_param_init_vector_from_reg(&src_params[2], &sampler->u.handle.reg); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -4004,6 +4647,38 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr + instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6); + } + ++static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, ++ enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int component_count) ++{ ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!bitmap_is_set(sm6->io_regs_declared, reg_type)) ++ { ++ bitmap_set(sm6->io_regs_declared, reg_type); ++ ins = sm6_parser_add_instruction(sm6, handler_idx); ++ dst_param = &ins->declaration.dst; ++ vsir_register_init(&dst_param->reg, reg_type, data_type, 0); ++ dst_param_init_vector(dst_param, component_count); ++ } ++} ++ ++static void sm6_parser_emit_dx_input_register_mov(struct sm6_parser *sm6, ++ struct vkd3d_shader_instruction *ins, enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type) ++{ ++ struct vkd3d_shader_src_param *src_param; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, reg_type, data_type, 1); ++ vsir_register_init(&src_param->reg, reg_type, data_type, 0); ++ src_param_init(src_param); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static const struct sm6_descriptor_info *sm6_parser_get_descriptor(struct sm6_parser *sm6, + enum vkd3d_shader_descriptor_type type, unsigned int id, const struct sm6_value *address) + { +@@ -4065,6 +4740,149 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int + ins->handler_idx = VKD3DSIH_NOP; + } + ++static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_DISCARD); ++ ++ if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ src_param_init_from_value(src_param, operands[0]); ++} ++ ++static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_src_param *src_params; ++ struct vkd3d_shader_instruction *ins; ++ struct vkd3d_shader_register regs[2]; ++ enum vkd3d_shader_opcode handler_idx; ++ unsigned int component_count; ++ ++ switch (op) ++ { ++ case DX_DOT2: ++ handler_idx = VKD3DSIH_DP2; ++ component_count = 2; ++ break; ++ case DX_DOT3: ++ handler_idx = VKD3DSIH_DP3; ++ component_count = 3; ++ break; ++ case DX_DOT4: ++ handler_idx = VKD3DSIH_DP4; ++ component_count = 4; ++ break; ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (!sm6_parser_emit_composite_construct(sm6, &operands[0], component_count, state, ®s[0])) ++ return; ++ if (!sm6_parser_emit_composite_construct(sm6, &operands[component_count], component_count, state, ®s[1])) ++ return; ++ ++ ins = state->ins; ++ vsir_instruction_init(ins, &sm6->p.location, handler_idx); ++ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) ++ return; ++ src_param_init_vector_from_reg(&src_params[0], ®s[0]); ++ src_param_init_vector_from_reg(&src_params[1], ®s[1]); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ ++static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_param_init_from_value(src_param, operands[0]); ++ src_param->modifiers = VKD3DSPSM_ABS; ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ ++static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ unsigned int component_count = 3, component_idx = 0; ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ enum vkd3d_shader_register_type reg_type; ++ ++ switch (op) ++ { ++ case DX_THREAD_ID: ++ reg_type = VKD3DSPR_THREADID; ++ break; ++ case DX_GROUP_ID: ++ reg_type = VKD3DSPR_THREADGROUPID; ++ break; ++ case DX_THREAD_ID_IN_GROUP: ++ reg_type = VKD3DSPR_LOCALTHREADID; ++ break; ++ case DX_FLATTENED_THREAD_ID_IN_GROUP: ++ reg_type = VKD3DSPR_LOCALTHREADINDEX; ++ component_count = 1; ++ break; ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, reg_type, VKD3D_DATA_UINT, component_count); ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ vsir_register_init(&src_param->reg, reg_type, VKD3D_DATA_UINT, 0); ++ if (component_count > 1) ++ { ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ component_idx = sm6_value_get_constant_uint(operands[0]); ++ } ++ src_param_init_scalar(src_param, component_idx); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ ++static enum vkd3d_shader_opcode sm6_dx_map_ma_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) ++{ ++ switch (op) ++ { ++ case DX_FMA: ++ return VKD3DSIH_DFMA; ++ case DX_FMAD: ++ return VKD3DSIH_MAD; ++ case DX_IMAD: ++ case DX_UMAD: ++ return VKD3DSIH_IMAD; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void sm6_parser_emit_dx_ma(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_params; ++ unsigned int i; ++ ++ vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_ma_op(op, operands[0]->type)); ++ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) ++ return; ++ for (i = 0; i < 3; ++i) ++ src_param_init_from_value(&src_params[i], operands[i]); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -4182,7 +5000,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + +- signature = &sm6->p.shader_desc.input_signature; ++ signature = &sm6->p.program.input_signature; + if (row_index >= signature->element_count) + { + WARN("Invalid row index %u.\n", row_index); +@@ -4202,6 +5020,32 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin + instruction_dst_param_init_ssa_scalar(ins, sm6); + } + ++static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_src_param *src_params; ++ struct vkd3d_shader_instruction *ins; ++ struct vkd3d_shader_register reg; ++ ++ if (!sm6_parser_emit_composite_construct(sm6, &operands[0], 2, state, ®)) ++ return; ++ ++ ins = state->ins; ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_params[0].reg = reg; ++ src_param_init_vector(&src_params[0], 2); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ ++static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); ++} ++ + static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -4424,6 +5268,59 @@ static void sm6_parser_emit_dx_buffer_store(struct sm6_parser *sm6, enum dx_intr + dst_param->reg = resource->u.handle.reg; + } + ++static void sm6_parser_emit_dx_get_sample_count(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_INFO); ++ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; ++ ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ vsir_register_init(&src_param->reg, VKD3DSPR_RASTERIZER, VKD3D_DATA_FLOAT, 0); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ src_param_init(src_param); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++ ins->dst->reg.data_type = VKD3D_DATA_FLOAT; ++} ++ ++static void sm6_parser_emit_dx_get_sample_pos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_params; ++ const struct sm6_value *resource = NULL; ++ ++ if (op == DX_TEX2DMS_GET_SAMPLE_POS) ++ { ++ resource = operands[0]; ++ if (!sm6_value_validate_is_texture_2dms_handle(resource, op, sm6)) ++ return; ++ } ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_POS); ++ ++ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) ++ return; ++ if (op == DX_TEX2DMS_GET_SAMPLE_POS) ++ { ++ src_param_init_vector_from_reg(&src_params[0], &resource->u.handle.reg); ++ src_param_init_from_value(&src_params[1], operands[1]); ++ } ++ else ++ { ++ src_param_init_vector(&src_params[0], 2); ++ vsir_register_init(&src_params[0].reg, VKD3DSPR_RASTERIZER, VKD3D_DATA_FLOAT, 0); ++ src_params[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ src_param_init_from_value(&src_params[1], operands[0]); ++ } ++ ++ instruction_dst_param_init_ssa_vector(ins, 2, sm6); ++} ++ + static unsigned int sm6_value_get_texel_offset(const struct sm6_value *value) + { + return sm6_value_is_undef(value) ? 0 : sm6_value_get_constant_uint(value); +@@ -4521,6 +5418,21 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ + instruction_dst_param_init_ssa_vector(ins, component_count, sm6); + } + ++static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_param_init_from_value(src_param, operands[0]); ++ ++ if (instruction_dst_param_init_ssa_scalar(ins, sm6)) ++ ins->dst->modifiers = VKD3DSPDM_SATURATE; ++} ++ + static void sm6_parser_emit_dx_sincos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -4572,7 +5484,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr + row_index = sm6_value_get_constant_uint(operands[0]); + column_index = sm6_value_get_constant_uint(operands[2]); + +- signature = &sm6->p.shader_desc.output_signature; ++ signature = &sm6->p.program.output_signature; + if (row_index >= signature->element_count) + { + WARN("Invalid row index %u.\n", row_index); +@@ -4608,10 +5520,78 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr + if (e->register_count > 1) + register_index_address_init(&dst_param->reg.idx[0], operands[1], sm6); + ++ if (e->register_index == UINT_MAX) ++ { ++ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_OUTPUT, dst_param->reg.type, ++ dst_param->reg.data_type, vsir_write_mask_component_count(e->mask)); ++ } ++ + if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) + src_param_init_from_value(src_param, value); + } + ++static void sm6_parser_emit_dx_texture_gather(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_register coord, offset; ++ const struct sm6_value *resource, *sampler; ++ struct vkd3d_shader_src_param *src_params; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int swizzle; ++ bool extended_offset; ++ ++ resource = operands[0]; ++ sampler = operands[1]; ++ if (!sm6_value_validate_is_texture_handle(resource, op, sm6) ++ || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) ++ { ++ return; ++ } ++ ++ if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], VKD3D_VEC4_SIZE, NULL, state, &coord)) ++ return; ++ ++ if ((extended_offset = !sm6_value_vector_is_constant_or_undef(&operands[6], 2)) ++ && !sm6_parser_emit_coordinate_construct(sm6, &operands[6], 2, NULL, state, &offset)) ++ { ++ return; ++ } ++ ++ ins = state->ins; ++ if (op == DX_TEXTURE_GATHER) ++ { ++ instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO : VKD3DSIH_GATHER4, resource, sm6); ++ if (!(src_params = instruction_src_params_alloc(ins, 3 + extended_offset, sm6))) ++ return; ++ } ++ else ++ { ++ instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO_C : VKD3DSIH_GATHER4_C, resource, sm6); ++ if (!(src_params = instruction_src_params_alloc(ins, 4 + extended_offset, sm6))) ++ return; ++ src_param_init_from_value(&src_params[3 + extended_offset], operands[9]); ++ } ++ ++ src_param_init_vector_from_reg(&src_params[0], &coord); ++ if (extended_offset) ++ src_param_init_vector_from_reg(&src_params[1], &offset); ++ else ++ instruction_set_texel_offset(ins, &operands[6], sm6); ++ src_param_init_vector_from_reg(&src_params[1 + extended_offset], &resource->u.handle.reg); ++ src_param_init_vector_from_reg(&src_params[2 + extended_offset], &sampler->u.handle.reg); ++ /* Swizzle stored in the sampler parameter is the scalar component index to be gathered. */ ++ swizzle = sm6_value_get_constant_uint(operands[8]); ++ if (swizzle >= VKD3D_VEC4_SIZE) ++ { ++ WARN("Invalid swizzle %#x.\n", swizzle); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Swizzle %#x for a texture gather operation is invalid.", swizzle); ++ } ++ src_params[2 + extended_offset].swizzle = swizzle; ++ ++ instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); ++} ++ + static void sm6_parser_emit_dx_texture_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -4741,9 +5721,12 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop}, + [DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop}, ++ [DX_BARRIER ] = {"v", "c", sm6_parser_emit_dx_barrier}, + [DX_BFREV ] = {"m", "R", sm6_parser_emit_dx_unary}, + [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, + [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store}, ++ [DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter}, ++ [DX_CALCULATE_LOD ] = {"f", "HHfffb", sm6_parser_emit_dx_calculate_lod}, + [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, + [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, + [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, +@@ -4752,18 +5735,28 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_DERIV_COARSEY ] = {"e", "R", sm6_parser_emit_dx_unary}, + [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, + [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, ++ [DX_DISCARD ] = {"v", "1", sm6_parser_emit_dx_discard}, ++ [DX_DOT2 ] = {"g", "RRRR", sm6_parser_emit_dx_dot}, ++ [DX_DOT3 ] = {"g", "RRRRRR", sm6_parser_emit_dx_dot}, ++ [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, + [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, ++ [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, + [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, ++ [DX_FLATTENED_THREAD_ID_IN_GROUP ] = {"i", "", sm6_parser_emit_dx_compute_builtin}, ++ [DX_FMA ] = {"g", "RRR", sm6_parser_emit_dx_ma}, ++ [DX_FMAD ] = {"g", "RRR", sm6_parser_emit_dx_ma}, + [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, + [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, + [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_GET_DIMENSIONS ] = {"D", "Hi", sm6_parser_emit_dx_get_dimensions}, ++ [DX_GROUP_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, + [DX_IBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, + [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_HTAN ] = {"g", "R", sm6_parser_emit_dx_unary}, ++ [DX_IMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, + [DX_IMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, + [DX_IMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, + [DX_ISFINITE ] = {"1", "g", sm6_parser_emit_dx_unary}, +@@ -4773,6 +5766,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_LEGACY_F32TOF16 ] = {"i", "f", sm6_parser_emit_dx_unary}, + [DX_LOAD_INPUT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, + [DX_LOG ] = {"g", "R", sm6_parser_emit_dx_unary}, ++ [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, ++ [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, + [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, + [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, + [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, +@@ -4780,20 +5775,29 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_ROUND_PI ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_ROUND_Z ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_RSQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, ++ [DX_RT_GET_SAMPLE_COUNT ] = {"i", "", sm6_parser_emit_dx_get_sample_count}, ++ [DX_RT_GET_SAMPLE_POS ] = {"o", "i", sm6_parser_emit_dx_get_sample_pos}, + [DX_SAMPLE ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, + [DX_SAMPLE_B ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, + [DX_SAMPLE_C ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, + [DX_SAMPLE_C_LZ ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, + [DX_SAMPLE_GRAD ] = {"o", "HHffffiiifffffff", sm6_parser_emit_dx_sample}, + [DX_SAMPLE_LOD ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, ++ [DX_SATURATE ] = {"g", "R", sm6_parser_emit_dx_saturate}, + [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_sincos}, + [DX_SPLIT_DOUBLE ] = {"S", "d", sm6_parser_emit_dx_split_double}, + [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, + [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, ++ [DX_TEX2DMS_GET_SAMPLE_POS ] = {"o", "Hi", sm6_parser_emit_dx_get_sample_pos}, ++ [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, ++ [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, + [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, + [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, ++ [DX_THREAD_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, ++ [DX_THREAD_ID_IN_GROUP ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, + [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, ++ [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, + [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, + [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, + }; +@@ -5055,7 +6059,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ + break; + case CAST_ZEXT: + case CAST_SEXT: +- /* nop or min precision. TODO: native 16-bit */ ++ /* nop or min precision. TODO: native 16-bit. ++ * Extension instructions could be emitted for min precision, but in Windows ++ * the AMD RX 580 simply drops such instructions, which makes sense as no ++ * assumptions should be made about any behaviour which depends on bit width. */ + if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) + { + op = VKD3DSIH_NOP; +@@ -5187,8 +6194,8 @@ static const struct sm6_cmp_info *sm6_map_cmp2_op(uint64_t code) + [FCMP_OLT] = {VKD3DSIH_LTO}, + [FCMP_OLE] = {VKD3DSIH_GEO, true}, + [FCMP_ONE] = {VKD3DSIH_NEO}, +- [FCMP_ORD] = {VKD3DSIH_INVALID}, +- [FCMP_UNO] = {VKD3DSIH_INVALID}, ++ [FCMP_ORD] = {VKD3DSIH_ORD}, ++ [FCMP_UNO] = {VKD3DSIH_UNO}, + [FCMP_UEQ] = {VKD3DSIH_EQU}, + [FCMP_UGT] = {VKD3DSIH_LTU, true}, + [FCMP_UGE] = {VKD3DSIH_GEU}, +@@ -5304,6 +6311,87 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor + instruction_dst_param_init_ssa_scalar(ins, sm6); + } + ++static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_record *record, ++ struct vkd3d_shader_instruction *ins, struct sm6_value *dst) ++{ ++ uint64_t success_ordering, failure_ordering; ++ struct vkd3d_shader_dst_param *dst_params; ++ struct vkd3d_shader_src_param *src_params; ++ const struct sm6_value *ptr, *cmp, *new; ++ const struct sm6_type *type; ++ unsigned int i = 0; ++ bool is_volatile; ++ uint64_t code; ++ ++ if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) ++ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) ++ return; ++ ++ if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) ++ { ++ WARN("Register is not groupshared.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "The destination register for a cmpxchg instruction is not groupshared memory."); ++ return; ++ } ++ ++ if (!(dst->type = sm6_type_get_cmpxchg_result_struct(sm6))) ++ { ++ WARN("Failed to find result struct.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "Module does not define a result struct type for a cmpxchg instruction."); ++ return; ++ } ++ ++ type = ptr->type->u.pointer.type; ++ cmp = sm6_parser_get_value_by_ref(sm6, record, type, &i); ++ new = sm6_parser_get_value_by_ref(sm6, record, type, &i); ++ if (!cmp || !new) ++ return; ++ ++ if (!sm6_value_validate_is_i32(cmp, sm6) ++ || !sm6_value_validate_is_i32(new, sm6) ++ || !dxil_record_validate_operand_count(record, i + 3, i + 5, sm6)) ++ { ++ return; ++ } ++ ++ is_volatile = record->operands[i++]; ++ success_ordering = record->operands[i++]; ++ ++ if ((code = record->operands[i++]) != 1) ++ FIXME("Ignoring synchronisation scope %"PRIu64".\n", code); ++ ++ failure_ordering = (record->operand_count > i) ? record->operands[i++] : success_ordering; ++ ++ /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ ++ if (success_ordering != ORDERING_SEQCST) ++ FIXME("Unhandled success ordering %"PRIu64".\n", success_ordering); ++ if (success_ordering != failure_ordering) ++ FIXME("Unhandled failure ordering %"PRIu64".\n", failure_ordering); ++ ++ if (record->operand_count > i && record->operands[i]) ++ FIXME("Ignoring weak cmpxchg.\n"); ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_IMM_ATOMIC_CMP_EXCH); ++ ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; ++ ++ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) ++ return; ++ src_param_make_constant_uint(&src_params[0], 0); ++ src_param_init_from_value(&src_params[1], cmp); ++ src_param_init_from_value(&src_params[2], new); ++ ++ if (!(dst_params = instruction_dst_params_alloc(ins, 2, sm6))) ++ return; ++ register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); ++ dst_param_init(&dst_params[0]); ++ dst_params[1].reg = ptr->u.reg; ++ dst_param_init(&dst_params[1]); ++ ++ dst->u.reg = dst_params[0].reg; ++} ++ + static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil_record *record, + struct vkd3d_shader_instruction *ins, struct sm6_value *dst) + { +@@ -5459,6 +6547,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record + register_index_address_init(®->idx[1], elem_value, sm6); + reg->idx[1].is_in_bounds = is_in_bounds; + reg->idx_count = 2; ++ dst->structure_stride = src->structure_stride; + + ins->handler_idx = VKD3DSIH_NOP; + } +@@ -5467,8 +6556,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor + struct vkd3d_shader_instruction *ins, struct sm6_value *dst) + { + const struct sm6_type *elem_type = NULL, *pointee_type; +- struct vkd3d_shader_src_param *src_param; +- unsigned int alignment, i = 0; ++ unsigned int alignment, operand_count, i = 0; ++ struct vkd3d_shader_src_param *src_params; + const struct sm6_value *ptr; + uint64_t alignment_code; + +@@ -5505,12 +6594,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor + if (record->operands[i]) + WARN("Ignoring volatile modifier.\n"); + +- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ if (ptr->structure_stride) ++ { ++ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); + +- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) +- return; +- src_param_init_from_value(&src_param[0], ptr); +- src_param->reg.alignment = alignment; ++ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) ++ return; ++ if (ptr->u.reg.idx[1].rel_addr) ++ src_params[0] = *ptr->u.reg.idx[1].rel_addr; ++ else ++ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); ++ /* Struct offset is always zero as there is no struct, just an array. */ ++ src_param_make_constant_uint(&src_params[1], 0); ++ src_param_init_from_value(&src_params[2], ptr); ++ src_params[2].reg.alignment = alignment; ++ } ++ else ++ { ++ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV); ++ ++ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) ++ return; ++ if (operand_count > 1) ++ src_param_make_constant_uint(&src_params[0], 0); ++ src_param_init_from_value(&src_params[operand_count - 1], ptr); ++ src_params[operand_count - 1].reg.alignment = alignment; ++ } + + instruction_dst_param_init_ssa_scalar(ins, sm6); + } +@@ -5628,11 +6739,11 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record + static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, + struct vkd3d_shader_instruction *ins, struct sm6_value *dst) + { +- struct vkd3d_shader_src_param *src_param; ++ unsigned int i = 0, alignment, operand_count; ++ struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_dst_param *dst_param; + const struct sm6_type *pointee_type; + const struct sm6_value *ptr, *src; +- unsigned int i = 0, alignment; + uint64_t alignment_code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) +@@ -5665,16 +6776,40 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco + if (record->operands[i]) + WARN("Ignoring volatile modifier.\n"); + +- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ if (ptr->structure_stride) ++ { ++ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); + +- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) +- return; +- src_param_init_from_value(&src_param[0], src); ++ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) ++ return; ++ if (ptr->u.reg.idx[1].rel_addr) ++ src_params[0] = *ptr->u.reg.idx[1].rel_addr; ++ else ++ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); ++ /* Struct offset is always zero as there is no struct, just an array. */ ++ src_param_make_constant_uint(&src_params[1], 0); ++ src_param_init_from_value(&src_params[2], src); ++ } ++ else ++ { ++ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); ++ ++ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) ++ return; ++ if (operand_count > 1) ++ src_param_make_constant_uint(&src_params[0], 0); ++ src_param_init_from_value(&src_params[operand_count - 1], src); ++ } + + dst_param = instruction_dst_params_alloc(ins, 1, sm6); + dst_param_init(dst_param); + dst_param->reg = ptr->u.reg; + dst_param->reg.alignment = alignment; ++ /* Groupshared stores contain the address in the src params. */ ++ if (dst_param->reg.type != VKD3DSPR_IDXTEMP) ++ dst_param->reg.idx_count = 1; + } + + static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record, +@@ -5855,6 +6990,25 @@ static bool sm6_metadata_get_uint64_value(const struct sm6_parser *sm6, + return true; + } + ++static bool sm6_metadata_get_float_value(const struct sm6_parser *sm6, ++ const struct sm6_metadata_value *m, float *f) ++{ ++ const struct sm6_value *value; ++ ++ if (!m || m->type != VKD3D_METADATA_VALUE) ++ return false; ++ ++ value = m->u.value; ++ if (!sm6_value_is_constant(value)) ++ return false; ++ if (!sm6_type_is_floating_point(value->type)) ++ return false; ++ ++ *f = register_get_float_value(&value->u.reg); ++ ++ return true; ++} ++ + static void sm6_parser_metadata_attachment_block_init(struct sm6_parser *sm6, const struct dxil_block *target_block, + const struct dxil_block *block) + { +@@ -6215,6 +7369,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + case FUNC_CODE_INST_ALLOCA: + sm6_parser_emit_alloca(sm6, record, ins, dst); + break; ++ case FUNC_CODE_INST_ATOMICRMW: ++ { ++ struct function_emission_state state = {code_block, ins}; ++ sm6_parser_emit_atomicrmw(sm6, record, &state, dst); ++ sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); ++ break; ++ } + case FUNC_CODE_INST_BINOP: + sm6_parser_emit_binop(sm6, record, ins, dst); + break; +@@ -6235,6 +7396,9 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + case FUNC_CODE_INST_CMP2: + sm6_parser_emit_cmp2(sm6, record, ins, dst); + break; ++ case FUNC_CODE_INST_CMPXCHG: ++ sm6_parser_emit_cmpxchg(sm6, record, ins, dst); ++ break; + case FUNC_CODE_INST_EXTRACTVAL: + sm6_parser_emit_extractval(sm6, record, ins, dst); + break; +@@ -6803,11 +7967,45 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = + [SEMANTIC_KIND_POSITION] = VKD3D_SHADER_SV_POSITION, + [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, + [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, ++ [SEMANTIC_KIND_DEPTH] = VKD3D_SHADER_SV_DEPTH, ++ [SEMANTIC_KIND_DEPTHLESSEQUAL] = VKD3D_SHADER_SV_DEPTH_LESS_EQUAL, ++ [SEMANTIC_KIND_DEPTHGREATEREQUAL] = VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL, + }; + +-static enum vkd3d_shader_sysval_semantic sysval_semantic_from_dxil_semantic_kind(enum dxil_semantic_kind kind) ++static enum vkd3d_shader_sysval_semantic sysval_semantic_from_dxil_semantic_kind(enum dxil_semantic_kind kind, ++ enum vkd3d_tessellator_domain domain) + { +- if (kind < ARRAY_SIZE(sysval_semantic_table)) ++ if (kind == SEMANTIC_KIND_TESSFACTOR) ++ { ++ switch (domain) ++ { ++ case VKD3D_TESSELLATOR_DOMAIN_LINE: ++ return VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; ++ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: ++ return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; ++ case VKD3D_TESSELLATOR_DOMAIN_QUAD: ++ return VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; ++ default: ++ /* Error is handled during parsing. */ ++ return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; ++ } ++ } ++ else if (kind == SEMANTIC_KIND_INSIDETESSFACTOR) ++ { ++ switch (domain) ++ { ++ case VKD3D_TESSELLATOR_DOMAIN_LINE: ++ return VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; ++ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: ++ return VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; ++ case VKD3D_TESSELLATOR_DOMAIN_QUAD: ++ return VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; ++ default: ++ /* Error is handled during parsing. */ ++ return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; ++ } ++ } ++ else if (kind < ARRAY_SIZE(sysval_semantic_table)) + { + return sysval_semantic_table[kind]; + } +@@ -7563,12 +8761,13 @@ static void signature_element_read_additional_element_values(struct signature_el + } + + static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, +- struct shader_signature *s) ++ struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) + { + unsigned int i, j, column_count, operand_count, index; + const struct sm6_metadata_node *node, *element_node; + struct signature_element *elements, *e; + unsigned int values[10]; ++ bool is_register; + + if (!m) + return VKD3D_OK; +@@ -7656,7 +8855,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const + e->min_precision = minimum_precision_from_dxil_component_type(values[2]); + + j = values[3]; +- e->sysval_semantic = sysval_semantic_from_dxil_semantic_kind(j); ++ e->sysval_semantic = sysval_semantic_from_dxil_semantic_kind(j, tessellator_domain); + if (j != SEMANTIC_KIND_ARBITRARY && j != SEMANTIC_KIND_TARGET && e->sysval_semantic == VKD3D_SHADER_SV_NONE) + { + WARN("Unhandled semantic kind %u.\n", j); +@@ -7677,7 +8876,18 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const + column_count = values[7]; + e->register_index = values[8]; + e->target_location = e->register_index; +- if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) ++ ++ if ((is_register = e->register_index == UINT_MAX)) ++ { ++ if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID) ++ { ++ WARN("Unhandled I/O register semantic kind %u.\n", j); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, ++ "DXIL semantic kind %u is unhandled for an I/O register.", j); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ } ++ else if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) + { + WARN("Invalid row start %u with row count %u.\n", e->register_index, e->register_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, +@@ -7685,8 +8895,9 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const + e->register_index, e->register_count); + return VKD3D_ERROR_INVALID_SHADER; + } ++ + index = values[9]; +- if (index >= VKD3D_VEC4_SIZE || column_count > VKD3D_VEC4_SIZE - index) ++ if (index != UINT8_MAX && (index >= VKD3D_VEC4_SIZE || column_count > VKD3D_VEC4_SIZE - index)) + { + WARN("Invalid column start %u with count %u.\n", index, column_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, +@@ -7696,10 +8907,13 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const + + e->mask = vkd3d_write_mask_from_component_count(column_count); + e->used_mask = e->mask; +- e->mask <<= index; +- + signature_element_read_additional_element_values(e, element_node, sm6); +- e->used_mask <<= index; ++ ++ if (index != UINT8_MAX) ++ { ++ e->mask <<= index; ++ e->used_mask <<= index; ++ } + + m = element_node->operands[4]; + if (!sm6_metadata_value_is_node(m)) +@@ -7739,7 +8953,8 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const + return VKD3D_OK; + } + +-static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) ++static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, ++ enum vkd3d_tessellator_domain tessellator_domain) + { + enum vkd3d_result ret; + +@@ -7752,19 +8967,19 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons + } + + if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], +- &sm6->p.shader_desc.input_signature)) < 0) ++ &sm6->p.program.input_signature, tessellator_domain)) < 0) + { + return ret; + } + if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], +- &sm6->p.shader_desc.output_signature)) < 0) ++ &sm6->p.program.output_signature, tessellator_domain)) < 0) + { + return ret; + } + /* TODO: patch constant signature in operand 2. */ + +- sm6_parser_init_input_signature(sm6, &sm6->p.shader_desc.input_signature); +- sm6_parser_init_output_signature(sm6, &sm6->p.shader_desc.output_signature); ++ sm6_parser_init_input_signature(sm6, &sm6->p.program.input_signature); ++ sm6_parser_init_output_signature(sm6, &sm6->p.program.output_signature); + + return VKD3D_OK; + } +@@ -7850,10 +9065,216 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co + return VKD3D_OK; + } + ++static void sm6_parser_emit_dcl_count(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, unsigned int count) ++{ ++ struct vkd3d_shader_instruction *ins; ++ ++ ins = sm6_parser_add_instruction(sm6, handler_idx); ++ ins->declaration.count = count; ++} ++ ++static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, ++ enum vkd3d_tessellator_domain tessellator_domain) ++{ ++ struct vkd3d_shader_instruction *ins; ++ ++ if (tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID || tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) ++ { ++ WARN("Unhandled domain %u.\n", tessellator_domain); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Domain shader tessellator domain %u is unhandled.", tessellator_domain); ++ } ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); ++ ins->declaration.tessellator_domain = tessellator_domain; ++} ++ ++static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, ++ const char *type) ++{ ++ if (!count || count > 32) ++ { ++ WARN("%s control point count %u invalid.\n", type, count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "%s control point count %u is invalid.", type, count); ++ } ++} ++ ++static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, ++ enum vkd3d_shader_tessellator_partitioning tessellator_partitioning) ++{ ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!tessellator_partitioning || tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) ++ { ++ WARN("Unhandled partitioning %u.\n", tessellator_partitioning); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Hull shader tessellator partitioning %u is unhandled.", tessellator_partitioning); ++ } ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); ++ ins->declaration.tessellator_partitioning = tessellator_partitioning; ++} ++ ++static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, ++ enum vkd3d_shader_tessellator_output_primitive primitive) ++{ ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!primitive || primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) ++ { ++ WARN("Unhandled output primitive %u.\n", primitive); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Hull shader tessellator output primitive %u is unhandled.", primitive); ++ } ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); ++ ins->declaration.tessellator_output_primitive = primitive; ++} ++ ++static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) ++{ ++ struct vkd3d_shader_instruction *ins; ++ float max_tessellation_factor; ++ ++ if (!sm6_metadata_get_float_value(sm6, m, &max_tessellation_factor)) ++ { ++ WARN("Max tess factor property is not a float value.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Hull shader max tessellation factor property operand is not a float."); ++ return; ++ } ++ ++ /* Exclude non-finite values. */ ++ if (!(max_tessellation_factor >= 1.0f && max_tessellation_factor <= 64.0f)) ++ { ++ WARN("Invalid max tess factor %f.\n", max_tessellation_factor); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Hull shader max tessellation factor %f is invalid.", max_tessellation_factor); ++ } ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_HS_MAX_TESSFACTOR); ++ ins->declaration.max_tessellation_factor = max_tessellation_factor; ++} ++ ++static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_parser *sm6, ++ const struct sm6_metadata_value *m) ++{ ++ const struct sm6_metadata_node *node; ++ unsigned int operands[2] = {0}; ++ unsigned int i; ++ ++ if (!m || !sm6_metadata_value_is_node(m)) ++ { ++ WARN("Missing or invalid DS properties.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Domain shader properties node is missing or invalid."); ++ return 0; ++ } ++ ++ node = m->u.node; ++ if (node->operand_count < ARRAY_SIZE(operands)) ++ { ++ WARN("Invalid operand count %u.\n", node->operand_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "Domain shader properties operand count %u is invalid.", node->operand_count); ++ return 0; ++ } ++ if (node->operand_count > ARRAY_SIZE(operands)) ++ { ++ WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring %zu extra operands for domain shader properties.", ++ node->operand_count - ARRAY_SIZE(operands)); ++ } ++ ++ for (i = 0; i < node->operand_count; ++i) ++ { ++ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) ++ { ++ WARN("DS property at index %u is not a uint value.\n", i); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Domain shader properties operand at index %u is not an integer.", i); ++ } ++ } ++ ++ sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); ++ sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); ++ sm6->p.program.input_control_point_count = operands[1]; ++ ++ return operands[0]; ++} ++ ++static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_parser *sm6, ++ const struct sm6_metadata_value *m) ++{ ++ const struct sm6_metadata_node *node; ++ unsigned int operands[6] = {0}; ++ unsigned int i; ++ ++ if (!m || !sm6_metadata_value_is_node(m)) ++ { ++ WARN("Missing or invalid HS properties.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Hull shader properties node is missing or invalid."); ++ return 0; ++ } ++ ++ node = m->u.node; ++ if (node->operand_count < 7) ++ { ++ WARN("Invalid operand count %u.\n", node->operand_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "Hull shader properties operand count %u is invalid.", node->operand_count); ++ return 0; ++ } ++ if (node->operand_count > 7) ++ { ++ WARN("Ignoring %u extra operands.\n", node->operand_count - 7); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring %u extra operands for hull shader properties.", node->operand_count - 7); ++ } ++ ++ m = node->operands[0]; ++ if (!sm6_metadata_value_is_value(m) || !sm6_value_is_function_dcl(m->u.value)) ++ { ++ WARN("Patch constant function node is not a function value.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Hull shader patch constant function node is not a function value."); ++ } ++ else ++ { ++ sm6->patch_constant_function = m->u.value->u.function.name; ++ } ++ ++ for (i = 1; i < min(node->operand_count, ARRAY_SIZE(operands)); ++i) ++ { ++ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) ++ { ++ WARN("HS property at index %u is not a uint value.\n", i); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Hull shader properties operand at index %u is not an integer.", i); ++ } ++ } ++ ++ sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); ++ sm6->p.program.input_control_point_count = operands[1]; ++ sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); ++ sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); ++ sm6->p.program.output_control_point_count = operands[2]; ++ sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); ++ sm6_parser_emit_dcl_tessellator_partitioning(sm6, operands[4]); ++ sm6_parser_emit_dcl_tessellator_output_primitive(sm6, operands[5]); ++ sm6_parser_emit_dcl_max_tessellation_factor(sm6, node->operands[6]); ++ ++ return operands[3]; ++} ++ + static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) + { + const struct sm6_metadata_value *m = sm6_parser_find_named_metadata(sm6, "dx.entryPoints"); + const struct sm6_metadata_node *node, *entry_node = m ? m->u.node : NULL; ++ enum vkd3d_tessellator_domain tessellator_domain = 0; + unsigned int i, operand_count, tag; + const struct sm6_value *value; + enum vkd3d_result ret; +@@ -7892,12 +9313,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) + "Entry point function name %s does not match the name in metadata.", sm6->entry_point); + } + +- if (entry_node->operand_count >= 3 && (m = entry_node->operands[2]) +- && (ret = sm6_parser_signatures_init(sm6, m)) < 0) +- { +- return ret; +- } +- + if (entry_node->operand_count >= 5 && (m = entry_node->operands[4])) + { + if (!sm6_metadata_value_is_node(m)) +@@ -7932,6 +9347,12 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) + case SHADER_PROPERTIES_FLAGS: + sm6_parser_emit_global_flags(sm6, node->operands[i + 1]); + break; ++ case SHADER_PROPERTIES_DOMAIN: ++ tessellator_domain = sm6_parser_ds_properties_init(sm6, node->operands[i + 1]); ++ break; ++ case SHADER_PROPERTIES_HULL: ++ tessellator_domain = sm6_parser_hs_properties_init(sm6, node->operands[i + 1]); ++ break; + case SHADER_PROPERTIES_COMPUTE: + if ((ret = sm6_parser_emit_thread_group(sm6, node->operands[i + 1])) < 0) + return ret; +@@ -7945,6 +9366,12 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) + } + } + ++ if (entry_node->operand_count >= 3 && (m = entry_node->operands[2]) ++ && (ret = sm6_parser_signatures_init(sm6, m, tessellator_domain)) < 0) ++ { ++ return ret; ++ } ++ + return VKD3D_OK; + } + +@@ -8062,7 +9489,6 @@ static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) + sm6_parser_metadata_cleanup(sm6); + vkd3d_free(sm6->descriptors); + vkd3d_free(sm6->values); +- free_shader_desc(&parser->shader_desc); + vkd3d_free(sm6); + } + +@@ -8080,15 +9506,16 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 + return NULL; + } + +-static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, +- const char *source_name, struct vkd3d_shader_message_context *message_context) ++static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *source_name, ++ struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) + { +- const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; +- const struct shader_signature *input_signature = &sm6->p.shader_desc.input_signature; ++ const struct shader_signature *output_signature = &sm6->p.program.output_signature; ++ const struct shader_signature *input_signature = &sm6->p.program.input_signature; ++ size_t count, length, function_count, byte_code_size = dxbc_desc->byte_code_size; + const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t version_token, dxil_version, token_count, magic; ++ const uint32_t *byte_code = dxbc_desc->byte_code; + unsigned int chunk_offset, chunk_size; +- size_t count, length, function_count; + enum bitcode_block_abbreviation abbr; + struct vkd3d_shader_version version; + struct dxil_block *block; +@@ -8181,6 +9608,11 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + sm6->ptr = &sm6->start[1]; + sm6->bitpos = 2; + ++ sm6->p.program.input_signature = dxbc_desc->input_signature; ++ sm6->p.program.output_signature = dxbc_desc->output_signature; ++ sm6->p.program.patch_constant_signature = dxbc_desc->patch_constant_signature; ++ memset(dxbc_desc, 0, sizeof(*dxbc_desc)); ++ + block = &sm6->root_block; + if ((ret = dxil_block_init(block, NULL, sm6)) < 0) + { +@@ -8351,7 +9783,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) + { +- struct vkd3d_shader_desc *shader_desc; ++ struct dxbc_shader_desc dxbc_desc = {0}; + uint32_t *byte_code = NULL; + struct sm6_parser *sm6; + int ret; +@@ -8364,35 +9796,37 @@ int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compi + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +- shader_desc = &sm6->p.shader_desc; +- shader_desc->is_dxil = true; ++ dxbc_desc.is_dxil = true; + if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, +- shader_desc)) < 0) ++ &dxbc_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm6); + return ret; + } + +- sm6->p.shader_desc = *shader_desc; +- shader_desc = &sm6->p.shader_desc; +- +- if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) ++ if (((uintptr_t)dxbc_desc.byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) + { + /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC + * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ +- if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) +- ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); +- else +- memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); ++ if (!(byte_code = vkd3d_malloc(align(dxbc_desc.byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) ++ { ++ ERR("Failed to allocate aligned chunk.\n"); ++ free_dxbc_shader_desc(&dxbc_desc); ++ vkd3d_free(sm6); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ memcpy(byte_code, dxbc_desc.byte_code, dxbc_desc.byte_code_size); ++ dxbc_desc.byte_code = byte_code; + } + +- ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, +- compile_info->source_name, message_context); ++ ret = sm6_parser_init(sm6, compile_info->source_name, message_context, &dxbc_desc); ++ free_dxbc_shader_desc(&dxbc_desc); + vkd3d_free(byte_code); + + if (!sm6->p.failed && ret >= 0) +- ret = vsir_validate(&sm6->p); ++ ret = vkd3d_shader_parser_validate(&sm6->p); + + if (sm6->p.failed && ret >= 0) + ret = VKD3D_ERROR_INVALID_SHADER; +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index bc70d5220fd..98443797543 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -61,9 +61,9 @@ struct fx_write_context; + struct fx_write_context_ops + { + uint32_t (*write_string)(const char *string, struct fx_write_context *fx); +- uint32_t (*write_type)(const struct hlsl_type *type, struct fx_write_context *fx); + void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); + void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); ++ bool are_child_effects_supported; + }; + + struct fx_write_context +@@ -84,8 +84,14 @@ struct fx_write_context + uint32_t buffer_count; + uint32_t numeric_variable_count; + uint32_t object_variable_count; ++ uint32_t shared_object_count; ++ uint32_t shader_variable_count; ++ uint32_t parameter_count; + int status; + ++ bool child_effect; ++ bool include_empty_buffers; ++ + const struct fx_write_context_ops *ops; + }; + +@@ -97,6 +103,11 @@ static void set_status(struct fx_write_context *fx, int status) + fx->status = status; + } + ++static bool has_annotations(const struct hlsl_ir_var *var) ++{ ++ return var->annotations && !list_empty(&var->annotations->vars); ++} ++ + static uint32_t write_string(const char *string, struct fx_write_context *fx) + { + return fx->ops->write_string(string, fx); +@@ -104,15 +115,22 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) + + static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) + { ++ if (var->state_block_count) ++ hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); ++ + fx->ops->write_pass(var, fx); + } + ++static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); ++ + static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) + { + struct type_entry *type_entry; + unsigned int elements_count; + const char *name; + ++ assert(fx->ctx->profile->major_version >= 4); ++ + if (type->class == HLSL_CLASS_ARRAY) + { + name = hlsl_get_multiarray_element_type(type)->name; +@@ -138,7 +156,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context + if (!(type_entry = hlsl_alloc(fx->ctx, sizeof(*type_entry)))) + return 0; + +- type_entry->offset = fx->ops->write_type(type, fx); ++ type_entry->offset = write_fx_4_type(type, fx); + type_entry->name = name; + type_entry->elements_count = elements_count; + +@@ -151,6 +169,7 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co + struct fx_write_context *fx) + { + unsigned int version = ctx->profile->major_version; ++ struct hlsl_block block; + + memset(fx, 0, sizeof(*fx)); + +@@ -174,12 +193,19 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co + + rb_init(&fx->strings, string_storage_compare); + list_init(&fx->types); ++ ++ fx->child_effect = fx->ops->are_child_effects_supported && ctx->child_effect; ++ fx->include_empty_buffers = version == 4 && ctx->include_empty_buffers; ++ ++ hlsl_block_init(&block); ++ hlsl_prepend_global_uniform_copy(fx->ctx, &block); ++ hlsl_block_cleanup(&block); ++ hlsl_calculate_buffer_offsets(fx->ctx); + } + + static int fx_write_context_cleanup(struct fx_write_context *fx) + { + struct type_entry *type, *next_type; +- int status = fx->status; + + rb_destroy(&fx->strings, string_storage_destroy, NULL); + +@@ -189,7 +215,7 @@ static int fx_write_context_cleanup(struct fx_write_context *fx) + vkd3d_free(type); + } + +- return status; ++ return fx->ctx->result; + } + + static bool technique_matches_version(const struct hlsl_ir_var *var, const struct fx_write_context *fx) +@@ -285,6 +311,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, + [HLSL_TYPE_UINT ] = 3, + [HLSL_TYPE_BOOL ] = 4, + }; ++ struct hlsl_ctx *ctx = fx->ctx; + uint32_t value = 0; + + switch (type->class) +@@ -295,8 +322,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, + value |= numeric_type_class[type->class]; + break; + default: +- FIXME("Unexpected type class %u.\n", type->class); +- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); ++ hlsl_fixme(ctx, &ctx->location, "Not implemented for type class %u.", type->class); + return 0; + } + +@@ -309,8 +335,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, + value |= (numeric_base_type[type->base_type] << NUMERIC_BASE_TYPE_SHIFT); + break; + default: +- FIXME("Unexpected base type %u.\n", type->base_type); +- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); ++ hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->base_type); + return 0; + } + +@@ -322,19 +347,14 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, + return value; + } + +-static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) ++static const char * get_fx_4_type_name(const struct hlsl_type *type) + { +- struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; +- uint32_t name_offset, offset, size, stride, numeric_desc; +- uint32_t elements_count = 0; +- const char *name; +- static const uint32_t variable_type[] = ++ static const char * const object_type_names[] = + { +- [HLSL_CLASS_SCALAR] = 1, +- [HLSL_CLASS_VECTOR] = 1, +- [HLSL_CLASS_MATRIX] = 1, +- [HLSL_CLASS_OBJECT] = 2, +- [HLSL_CLASS_STRUCT] = 3, ++ [HLSL_TYPE_PIXELSHADER] = "PixelShader", ++ [HLSL_TYPE_VERTEXSHADER] = "VertexShader", ++ [HLSL_TYPE_RENDERTARGETVIEW] = "RenderTargetView", ++ [HLSL_TYPE_DEPTHSTENCILVIEW] = "DepthStencilView", + }; + static const char * const texture_type_names[] = + { +@@ -360,6 +380,32 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", + }; + ++ if (type->class == HLSL_CLASS_TEXTURE) ++ return texture_type_names[type->sampler_dim]; ++ ++ if (type->class == HLSL_CLASS_UAV) ++ return uav_type_names[type->sampler_dim]; ++ ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_PIXELSHADER: ++ case HLSL_TYPE_VERTEXSHADER: ++ case HLSL_TYPE_RENDERTARGETVIEW: ++ case HLSL_TYPE_DEPTHSTENCILVIEW: ++ return object_type_names[type->base_type]; ++ default: ++ return type->name; ++ } ++} ++ ++static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; ++ uint32_t name_offset, offset, size, stride, numeric_desc; ++ uint32_t elements_count = 0; ++ const char *name; ++ struct hlsl_ctx *ctx = fx->ctx; ++ + /* Resolve arrays to element type and number of elements. */ + if (type->class == HLSL_CLASS_ARRAY) + { +@@ -367,12 +413,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + type = hlsl_get_multiarray_element_type(type); + } + +- if (type->base_type == HLSL_TYPE_TEXTURE) +- name = texture_type_names[type->sampler_dim]; +- else if (type->base_type == HLSL_TYPE_UAV) +- name = uav_type_names[type->sampler_dim]; +- else +- name = type->name; ++ name = get_fx_4_type_name(type); + + name_offset = write_string(name, fx); + offset = put_u32_unaligned(buffer, name_offset); +@@ -382,11 +423,25 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: ++ put_u32_unaligned(buffer, 1); ++ break; ++ + case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: ++ put_u32_unaligned(buffer, 2); ++ break; ++ + case HLSL_CLASS_STRUCT: +- put_u32_unaligned(buffer, variable_type[type->class]); ++ put_u32_unaligned(buffer, 3); + break; +- default: ++ ++ case HLSL_CLASS_ARRAY: ++ vkd3d_unreachable(); ++ ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_VOID: + FIXME("Writing type class %u is not implemented.\n", type->class); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + return 0; +@@ -422,13 +477,8 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + put_u32_unaligned(buffer, field_type_offset); + } + } +- else if (type->class == HLSL_CLASS_OBJECT) ++ else if (type->class == HLSL_CLASS_TEXTURE) + { +- static const uint32_t object_type[] = +- { +- [HLSL_TYPE_RENDERTARGETVIEW] = 19, +- [HLSL_TYPE_DEPTHSTENCILVIEW] = 20, +- }; + static const uint32_t texture_type[] = + { + [HLSL_SAMPLER_DIM_GENERIC] = 9, +@@ -442,6 +492,11 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + [HLSL_SAMPLER_DIM_CUBE] = 17, + [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, + }; ++ ++ put_u32_unaligned(buffer, texture_type[type->sampler_dim]); ++ } ++ else if (type->class == HLSL_CLASS_UAV) ++ { + static const uint32_t uav_type[] = + { + [HLSL_SAMPLER_DIM_1D] = 31, +@@ -453,21 +508,28 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, + }; + ++ put_u32_unaligned(buffer, uav_type[type->sampler_dim]); ++ } ++ else if (type->class == HLSL_CLASS_OBJECT) ++ { ++ static const uint32_t object_type[] = ++ { ++ [HLSL_TYPE_PIXELSHADER] = 5, ++ [HLSL_TYPE_VERTEXSHADER] = 6, ++ [HLSL_TYPE_RENDERTARGETVIEW] = 19, ++ [HLSL_TYPE_DEPTHSTENCILVIEW] = 20, ++ }; ++ + switch (type->base_type) + { + case HLSL_TYPE_DEPTHSTENCILVIEW: ++ case HLSL_TYPE_PIXELSHADER: + case HLSL_TYPE_RENDERTARGETVIEW: ++ case HLSL_TYPE_VERTEXSHADER: + put_u32_unaligned(buffer, object_type[type->base_type]); + break; +- case HLSL_TYPE_TEXTURE: +- put_u32_unaligned(buffer, texture_type[type->sampler_dim]); +- break; +- case HLSL_TYPE_UAV: +- put_u32_unaligned(buffer, uav_type[type->sampler_dim]); +- break; + default: +- FIXME("Object type %u is not supported.\n", type->base_type); +- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); ++ hlsl_fixme(ctx, &ctx->location, "Object type %u is not supported.", type->base_type); + return 0; + } + } +@@ -565,11 +627,71 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f + { + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + const char *s = string ? string : ""; ++ static const char tail[3]; + uint32_t size, offset; + + size = strlen(s) + 1; + offset = put_u32(buffer, size); + bytecode_put_bytes(buffer, s, size); ++ size %= 4; ++ if (size) ++ bytecode_put_bytes_unaligned(buffer, tail, 4 - size); ++ return offset; ++} ++ ++static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, ++ struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; ++ uint32_t semantic_offset, offset, elements_count = 0, name_offset; ++ size_t i; ++ ++ /* Resolve arrays to element type and number of elements. */ ++ if (type->class == HLSL_CLASS_ARRAY) ++ { ++ elements_count = hlsl_get_multiarray_size(type); ++ type = hlsl_get_multiarray_element_type(type); ++ } ++ ++ name_offset = write_string(name, fx); ++ semantic_offset = write_string(semantic->name, fx); ++ ++ offset = put_u32(buffer, hlsl_sm1_base_type(type)); ++ put_u32(buffer, hlsl_sm1_class(type)); ++ put_u32(buffer, name_offset); ++ put_u32(buffer, semantic_offset); ++ put_u32(buffer, elements_count); ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_VECTOR: ++ put_u32(buffer, type->dimx); ++ put_u32(buffer, type->dimy); ++ break; ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_MATRIX: ++ put_u32(buffer, type->dimy); ++ put_u32(buffer, type->dimx); ++ break; ++ case HLSL_CLASS_STRUCT: ++ put_u32(buffer, type->e.record.field_count); ++ break; ++ default: ++ ; ++ } ++ ++ if (type->class == HLSL_CLASS_STRUCT) ++ { ++ for (i = 0; i < type->e.record.field_count; ++i) ++ { ++ const struct hlsl_struct_field *field = &type->e.record.fields[i]; ++ ++ /* Validated in check_invalid_object_fields(). */ ++ assert(hlsl_is_numeric_type(field->type)); ++ write_fx_2_parameter(field->type, field->name, &field->semantic, fx); ++ } ++ } ++ + return offset; + } + +@@ -595,6 +717,163 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex + set_u32(buffer, count_offset, count); + } + ++static uint32_t get_fx_2_type_size(const struct hlsl_type *type) ++{ ++ uint32_t size = 0, elements_count; ++ size_t i; ++ ++ if (type->class == HLSL_CLASS_ARRAY) ++ { ++ elements_count = hlsl_get_multiarray_size(type); ++ type = hlsl_get_multiarray_element_type(type); ++ return get_fx_2_type_size(type) * elements_count; ++ } ++ else if (type->class == HLSL_CLASS_STRUCT) ++ { ++ for (i = 0; i < type->e.record.field_count; ++i) ++ { ++ const struct hlsl_struct_field *field = &type->e.record.fields[i]; ++ size += get_fx_2_type_size(field->type); ++ } ++ ++ return size; ++ } ++ ++ return type->dimx * type->dimy * sizeof(float); ++} ++ ++static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; ++ const struct hlsl_type *type = var->data_type; ++ uint32_t offset, size, elements_count = 1; ++ ++ size = get_fx_2_type_size(type); ++ ++ if (type->class == HLSL_CLASS_ARRAY) ++ { ++ elements_count = hlsl_get_multiarray_size(type); ++ type = hlsl_get_multiarray_element_type(type); ++ } ++ ++ /* Note that struct fields must all be numeric; ++ * this was validated in check_invalid_object_fields(). */ ++ switch (type->class) ++ { ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ case HLSL_CLASS_STRUCT: ++ /* FIXME: write actual initial value */ ++ offset = put_u32(buffer, 0); ++ ++ for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) ++ put_u32(buffer, 0); ++ break; ++ ++ default: ++ /* Objects are given sequential ids. */ ++ offset = put_u32(buffer, fx->object_variable_count++); ++ for (uint32_t i = 1; i < elements_count; ++i) ++ put_u32(buffer, fx->object_variable_count++); ++ break; ++ } ++ ++ return offset; ++} ++ ++static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type *type, ++ const struct vkd3d_shader_location *loc) ++{ ++ switch (type->class) ++ { ++ case HLSL_CLASS_STRUCT: ++ /* Note that the fields must all be numeric; this was validated in ++ * check_invalid_object_fields(). */ ++ return true; ++ ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ return true; ++ ++ case HLSL_CLASS_ARRAY: ++ return is_type_supported_fx_2(ctx, type->e.array.type, loc); ++ ++ case HLSL_CLASS_TEXTURE: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ case HLSL_SAMPLER_DIM_2D: ++ case HLSL_SAMPLER_DIM_3D: ++ case HLSL_SAMPLER_DIM_CUBE: ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return true; ++ default: ++ return false; ++ } ++ break; ++ ++ case HLSL_CLASS_OBJECT: ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_PIXELSHADER: ++ case HLSL_TYPE_VERTEXSHADER: ++ hlsl_fixme(ctx, loc, "Write fx 2.0 parameter object type %#x.", type->base_type); ++ return false; ++ ++ default: ++ return false; ++ } ++ ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); ++ return false; ++ ++ case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VOID: ++ return false; ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static void write_fx_2_parameters(struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t desc_offset, value_offset, flags; ++ struct hlsl_ctx *ctx = fx->ctx; ++ struct hlsl_ir_var *var; ++ enum fx_2_parameter_flags ++ { ++ IS_SHARED = 0x1, ++ }; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) ++ continue; ++ ++ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); ++ value_offset = write_fx_2_initial_value(var, fx); ++ ++ flags = 0; ++ if (var->storage_modifiers & HLSL_STORAGE_SHARED) ++ flags |= IS_SHARED; ++ ++ put_u32(buffer, desc_offset); /* Parameter description */ ++ put_u32(buffer, value_offset); /* Value */ ++ put_u32(buffer, flags); /* Flags */ ++ ++ put_u32(buffer, 0); /* Annotations count */ ++ if (has_annotations(var)) ++ hlsl_fixme(ctx, &ctx->location, "Writing annotations for parameters is not implemented."); ++ ++ ++fx->parameter_count; ++ } ++} ++ + static const struct fx_write_context_ops fx_2_ops = + { + .write_string = write_fx_2_string, +@@ -604,12 +883,13 @@ static const struct fx_write_context_ops fx_2_ops = + + static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + { ++ uint32_t offset, size, technique_count, parameter_count, object_count; + struct vkd3d_bytecode_buffer buffer = { 0 }; + struct vkd3d_bytecode_buffer *structured; +- uint32_t offset, size, technique_count; + struct fx_write_context fx; + + fx_write_context_init(ctx, &fx_2_ops, &fx); ++ fx.object_variable_count = 1; + structured = &fx.structured; + + /* First entry is always zeroed and skipped. */ +@@ -618,12 +898,14 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, 0xfeff0901); /* Version. */ + offset = put_u32(&buffer, 0); + +- put_u32(structured, 0); /* Parameter count */ ++ parameter_count = put_u32(structured, 0); /* Parameter count */ + technique_count = put_u32(structured, 0); + put_u32(structured, 0); /* Unknown */ +- put_u32(structured, 0); /* Object count */ ++ object_count = put_u32(structured, 0); + +- /* TODO: parameters */ ++ write_fx_2_parameters(&fx); ++ set_u32(structured, parameter_count, fx.parameter_count); ++ set_u32(structured, object_count, fx.object_variable_count); + + write_techniques(ctx->globals, &fx); + set_u32(structured, technique_count, fx.technique_count); +@@ -643,24 +925,27 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + vkd3d_free(fx.unstructured.data); + vkd3d_free(fx.structured.data); + +- if (!fx.status) ++ if (!fx.technique_count) ++ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found."); ++ ++ if (fx.status < 0) ++ ctx->result = fx.status; ++ ++ if (!ctx->result) + { + out->code = buffer.data; + out->size = buffer.size; + } + +- if (fx.status < 0) +- ctx->result = fx.status; +- + return fx_write_context_cleanup(&fx); + } + + static const struct fx_write_context_ops fx_4_ops = + { + .write_string = write_fx_4_string, +- .write_type = write_fx_4_type, + .write_technique = write_fx_4_technique, + .write_pass = write_fx_4_pass, ++ .are_child_effects_supported = true, + }; + + static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) +@@ -672,6 +957,7 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write + { + HAS_EXPLICIT_BIND_POINT = 0x4, + }; ++ struct hlsl_ctx *ctx = fx->ctx; + + /* Explicit bind point. */ + if (var->reg_reservation.reg_type) +@@ -690,14 +976,18 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write + put_u32(buffer, flags); /* Flags */ + + put_u32(buffer, 0); /* Annotations count */ +- /* FIXME: write annotations */ ++ if (has_annotations(var)) ++ hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); + } + + static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) + { ++ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); ++ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t semantic_offset, bind_point = ~0u; +- uint32_t name_offset, type_offset; ++ uint32_t name_offset, type_offset, i; ++ struct hlsl_ctx *ctx = fx->ctx; + + if (var->reg_reservation.reg_type) + bind_point = var->reg_reservation.reg_index; +@@ -712,8 +1002,47 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ + semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ + put_u32(buffer, bind_point); /* Explicit bind point */ + ++ if (fx->child_effect && var->storage_modifiers & HLSL_STORAGE_SHARED) ++ { ++ ++fx->shared_object_count; ++ return; ++ } ++ ++ /* Initializer */ ++ switch (type->class) ++ { ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: ++ break; ++ ++ case HLSL_CLASS_OBJECT: ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_RENDERTARGETVIEW: ++ break; ++ case HLSL_TYPE_PIXELSHADER: ++ case HLSL_TYPE_VERTEXSHADER: ++ /* FIXME: write shader blobs, once parser support works. */ ++ for (i = 0; i < elements_count; ++i) ++ put_u32(buffer, 0); ++ ++fx->shader_variable_count; ++ break; ++ default: ++ hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", ++ type->base_type); ++ } ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", ++ type->base_type); ++ } ++ + put_u32(buffer, 0); /* Annotations count */ +- /* FIXME: write annotations */ ++ if (has_annotations(var)) ++ hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); ++ ++ ++fx->object_variable_count; + } + + static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx) +@@ -734,7 +1063,8 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + bind_point = b->reservation.reg_index; + if (b->type == HLSL_BUFFER_TEXTURE) + flags |= IS_TBUFFER; +- /* FIXME: set 'single' flag for fx_5_0 */ ++ if (ctx->profile->major_version == 5 && b->modifiers & HLSL_MODIFIER_SINGLE) ++ flags |= IS_SINGLE; + + name_offset = write_string(b->name, fx); + +@@ -745,7 +1075,8 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + put_u32(buffer, bind_point); /* Bind point */ + + put_u32(buffer, 0); /* Annotations count */ +- /* FIXME: write annotations */ ++ if (b->annotations) ++ hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); + + count = 0; + size = 0; +@@ -768,16 +1099,12 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + static void write_buffers(struct fx_write_context *fx) + { + struct hlsl_buffer *buffer; +- struct hlsl_block block; +- +- hlsl_block_init(&block); +- hlsl_prepend_global_uniform_copy(fx->ctx, &block); +- hlsl_block_init(&block); +- hlsl_calculate_buffer_offsets(fx->ctx); + + LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) + { +- if (!buffer->size) ++ if (!buffer->size && !fx->include_empty_buffers) ++ continue; ++ if (!strcmp(buffer->name, "$Params")) + continue; + + write_fx_4_buffer(buffer, fx); +@@ -789,38 +1116,46 @@ static bool is_object_variable(const struct hlsl_ir_var *var) + { + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + +- if (type->class != HLSL_CLASS_OBJECT) +- return false; +- +- switch (type->base_type) ++ switch (type->class) + { +- case HLSL_TYPE_SAMPLER: +- case HLSL_TYPE_TEXTURE: +- case HLSL_TYPE_UAV: +- case HLSL_TYPE_PIXELSHADER: +- case HLSL_TYPE_VERTEXSHADER: +- case HLSL_TYPE_RENDERTARGETVIEW: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: + return true; ++ ++ case HLSL_CLASS_OBJECT: ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_PIXELSHADER: ++ case HLSL_TYPE_VERTEXSHADER: ++ case HLSL_TYPE_RENDERTARGETVIEW: ++ return true; ++ default: ++ return false; ++ } ++ + default: + return false; + } + } + +-static void write_objects(struct fx_write_context *fx) ++static void write_objects(struct fx_write_context *fx, bool shared) + { + struct hlsl_ir_var *var; +- uint32_t count = 0; ++ ++ if (shared && !fx->child_effect) ++ return; + + LIST_FOR_EACH_ENTRY(var, &fx->ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!is_object_variable(var)) + continue; + ++ if (fx->child_effect && (shared != !!(var->storage_modifiers & HLSL_STORAGE_SHARED))) ++ continue; ++ + write_fx_4_object_variable(var, fx); +- ++count; + } +- +- fx->object_variable_count += count; + } + + static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -834,9 +1169,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ + + write_buffers(&fx); +- write_objects(&fx); ++ write_objects(&fx, false); + /* TODO: shared buffers */ +- /* TODO: shared objects */ ++ write_objects(&fx, true); + + write_techniques(ctx->globals, &fx); + +@@ -846,7 +1181,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ + put_u32(&buffer, 0); /* Pool buffer count. */ + put_u32(&buffer, 0); /* Pool variable count. */ +- put_u32(&buffer, 0); /* Pool object count. */ ++ put_u32(&buffer, fx.shared_object_count); /* Shared object count. */ + put_u32(&buffer, fx.technique_count); + size_offset = put_u32(&buffer, 0); /* Unstructured size. */ + put_u32(&buffer, 0); /* String count. */ +@@ -857,7 +1192,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, 0); /* Sampler state count. */ + put_u32(&buffer, 0); /* Rendertarget view count. */ + put_u32(&buffer, 0); /* Depth stencil view count. */ +- put_u32(&buffer, 0); /* Shader count. */ ++ put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ + put_u32(&buffer, 0); /* Inline shader count. */ + + set_u32(&buffer, size_offset, fx.unstructured.size); +@@ -870,15 +1205,15 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + + set_status(&fx, buffer.status); + +- if (!fx.status) ++ if (fx.status < 0) ++ ctx->result = fx.status; ++ ++ if (!ctx->result) + { + out->code = buffer.data; + out->size = buffer.size; + } + +- if (fx.status < 0) +- ctx->result = fx.status; +- + return fx_write_context_cleanup(&fx); + } + +@@ -893,7 +1228,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ + + write_buffers(&fx); +- write_objects(&fx); ++ write_objects(&fx, false); + /* TODO: interface variables */ + + write_groups(&fx); +@@ -915,7 +1250,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, 0); /* Sampler state count. */ + put_u32(&buffer, 0); /* Rendertarget view count. */ + put_u32(&buffer, 0); /* Depth stencil view count. */ +- put_u32(&buffer, 0); /* Shader count. */ ++ put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ + put_u32(&buffer, 0); /* Inline shader count. */ + put_u32(&buffer, fx.group_count); /* Group count. */ + put_u32(&buffer, 0); /* UAV count. */ +@@ -933,15 +1268,15 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + + set_status(&fx, buffer.status); + +- if (!fx.status) ++ if (fx.status < 0) ++ ctx->result = fx.status; ++ ++ if (!ctx->result) + { + out->code = buffer.data; + out->size = buffer.size; + } + +- if (fx.status < 0) +- ctx->result = fx.status; +- + return fx_write_context_cleanup(&fx); + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index bdd03c1e72a..3e8dd2c486b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -20,29 +20,14 @@ + + struct vkd3d_glsl_generator + { +- struct vkd3d_shader_version version; ++ struct vsir_program *program; + struct vkd3d_string_buffer buffer; + struct vkd3d_shader_location location; + struct vkd3d_shader_message_context *message_context; ++ unsigned int indent; + bool failed; + }; + +-struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, +- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) +-{ +- struct vkd3d_glsl_generator *generator; +- +- if (!(generator = vkd3d_malloc(sizeof(*generator)))) +- return NULL; +- +- memset(generator, 0, sizeof(*generator)); +- generator->version = *version; +- vkd3d_string_buffer_init(&generator->buffer); +- generator->location = *location; +- generator->message_context = message_context; +- return generator; +-} +- + static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( + struct vkd3d_glsl_generator *generator, + enum vkd3d_shader_error error, const char *fmt, ...) +@@ -55,10 +40,23 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( + generator->failed = true; + } + ++static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) ++{ ++ vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); ++} ++ ++static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ shader_glsl_print_indent(&gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); ++} ++ + static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, + const struct vkd3d_shader_instruction *ins) + { +- const struct vkd3d_shader_version *version = &generator->version; ++ const struct vkd3d_shader_version *version = &generator->program->shader_version; + + /* + * TODO: Implement in_subroutine +@@ -66,6 +64,7 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, + */ + if (version->major >= 4) + { ++ shader_glsl_print_indent(&generator->buffer, generator->indent); + vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); + } + } +@@ -73,6 +72,8 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, + static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, + const struct vkd3d_shader_instruction *instruction) + { ++ generator->location = instruction->location; ++ + switch (instruction->handler_idx) + { + case VKD3DSIH_DCL_INPUT: +@@ -83,38 +84,44 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator + shader_glsl_ret(generator, instruction); + break; + default: +- vkd3d_glsl_compiler_error(generator, +- VKD3D_SHADER_ERROR_GLSL_INTERNAL, +- "Unhandled instruction %#x", instruction->handler_idx); ++ shader_glsl_unhandled(generator, instruction); + break; + } + } + +-int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, +- struct vsir_program *program, struct vkd3d_shader_code *out) ++static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) + { ++ const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; ++ struct vkd3d_string_buffer *buffer = &gen->buffer; + unsigned int i; + void *code; + +- vkd3d_string_buffer_printf(&generator->buffer, "#version 440\n\n"); +- vkd3d_string_buffer_printf(&generator->buffer, "void main()\n{\n"); ++ ERR("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); ++ ++ vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); ++ ++ vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + +- generator->location.column = 0; +- for (i = 0; i < program->instructions.count; ++i) ++ vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); ++ ++ ++gen->indent; ++ for (i = 0; i < instructions->count; ++i) + { +- generator->location.line = i + 1; +- vkd3d_glsl_handle_instruction(generator, &program->instructions.elements[i]); ++ vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); + } + +- if (generator->failed) +- return VKD3D_ERROR_INVALID_SHADER; ++ vkd3d_string_buffer_printf(buffer, "}\n"); + +- vkd3d_string_buffer_printf(&generator->buffer, "}\n"); ++ if (TRACE_ON()) ++ vkd3d_string_buffer_trace(buffer); ++ ++ if (gen->failed) ++ return VKD3D_ERROR_INVALID_SHADER; + +- if ((code = vkd3d_malloc(generator->buffer.buffer_size))) ++ if ((code = vkd3d_malloc(buffer->buffer_size))) + { +- memcpy(code, generator->buffer.buffer, generator->buffer.content_size); +- out->size = generator->buffer.content_size; ++ memcpy(code, buffer->buffer, buffer->content_size); ++ out->size = buffer->content_size; + out->code = code; + } + else return VKD3D_ERROR_OUT_OF_MEMORY; +@@ -122,8 +129,33 @@ int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, + return VKD3D_OK; + } + +-void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator) ++static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) ++{ ++ vkd3d_string_buffer_cleanup(&gen->buffer); ++} ++ ++static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, ++ struct vsir_program *program, struct vkd3d_shader_message_context *message_context) + { +- vkd3d_string_buffer_cleanup(&generator->buffer); +- vkd3d_free(generator); ++ memset(gen, 0, sizeof(*gen)); ++ gen->program = program; ++ vkd3d_string_buffer_init(&gen->buffer); ++ gen->message_context = message_context; ++} ++ ++int glsl_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_glsl_generator generator; ++ int ret; ++ ++ if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0) ++ return ret; ++ ++ vkd3d_glsl_generator_init(&generator, program, message_context); ++ ret = vkd3d_glsl_generator_generate(&generator, out); ++ vkd3d_glsl_generator_cleanup(&generator); ++ ++ return ret; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 538f083df9c..5dd80ff1c3f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -134,14 +134,39 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) + return hlsl_get_var(scope->upper, name); + } + +-void hlsl_free_var(struct hlsl_ir_var *decl) ++static void free_state_block_entry(struct hlsl_state_block_entry *entry) ++{ ++ vkd3d_free(entry->name); ++ vkd3d_free(entry->args); ++ hlsl_block_cleanup(entry->instrs); ++ vkd3d_free(entry->instrs); ++ vkd3d_free(entry); ++} ++ ++void hlsl_free_state_block(struct hlsl_state_block *state_block) + { + unsigned int k; + ++ assert(state_block); ++ for (k = 0; k < state_block->count; ++k) ++ free_state_block_entry(state_block->entries[k]); ++ vkd3d_free(state_block->entries); ++ vkd3d_free(state_block); ++} ++ ++void hlsl_free_var(struct hlsl_ir_var *decl) ++{ ++ unsigned int k, i; ++ + vkd3d_free((void *)decl->name); + hlsl_cleanup_semantic(&decl->semantic); + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); ++ ++ for (i = 0; i < decl->state_block_count; ++i) ++ hlsl_free_state_block(decl->state_blocks[i]); ++ vkd3d_free(decl->state_blocks); ++ + vkd3d_free(decl); + } + +@@ -201,50 +226,46 @@ unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) + + bool hlsl_type_is_resource(const struct hlsl_type *type) + { +- if (type->class == HLSL_CLASS_ARRAY) +- return hlsl_type_is_resource(type->e.array.type); +- +- if (type->class == HLSL_CLASS_OBJECT) ++ switch (type->class) + { +- switch (type->base_type) +- { +- case HLSL_TYPE_TEXTURE: +- case HLSL_TYPE_SAMPLER: +- case HLSL_TYPE_UAV: +- return true; +- default: +- return false; +- } ++ case HLSL_CLASS_ARRAY: ++ return hlsl_type_is_resource(type->e.array.type); ++ ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: ++ return true; ++ ++ default: ++ return false; + } +- return false; + } + + /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or + * resources, since for both their data types span across a single regset. */ + static enum hlsl_regset type_get_regset(const struct hlsl_type *type) + { +- if (hlsl_is_numeric_type(type)) +- return HLSL_REGSET_NUMERIC; ++ switch (type->class) ++ { ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ return HLSL_REGSET_NUMERIC; + +- if (type->class == HLSL_CLASS_ARRAY) +- return type_get_regset(type->e.array.type); ++ case HLSL_CLASS_ARRAY: ++ return type_get_regset(type->e.array.type); + +- if (type->class == HLSL_CLASS_OBJECT) +- { +- switch (type->base_type) +- { +- case HLSL_TYPE_TEXTURE: +- return HLSL_REGSET_TEXTURES; ++ case HLSL_CLASS_SAMPLER: ++ return HLSL_REGSET_SAMPLERS; + +- case HLSL_TYPE_SAMPLER: +- return HLSL_REGSET_SAMPLERS; ++ case HLSL_CLASS_TEXTURE: ++ return HLSL_REGSET_TEXTURES; + +- case HLSL_TYPE_UAV: +- return HLSL_REGSET_UAVS; ++ case HLSL_CLASS_UAV: ++ return HLSL_REGSET_UAVS; + +- default: +- vkd3d_unreachable(); +- } ++ default: ++ break; + } + + vkd3d_unreachable(); +@@ -330,16 +351,22 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + break; + } + +- case HLSL_CLASS_OBJECT: +- { +- if (hlsl_type_is_resource(type)) +- { +- enum hlsl_regset regset = type_get_regset(type); ++ case HLSL_CLASS_SAMPLER: ++ type->reg_size[HLSL_REGSET_SAMPLERS] = 1; ++ break; + +- type->reg_size[regset] = 1; +- } ++ case HLSL_CLASS_TEXTURE: ++ type->reg_size[HLSL_REGSET_TEXTURES] = 1; ++ break; ++ ++ case HLSL_CLASS_UAV: ++ type->reg_size[HLSL_REGSET_UAVS] = 1; ++ break; ++ ++ case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_VOID: + break; +- } + } + } + +@@ -352,6 +379,25 @@ unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, + return type->reg_size[regset]; + } + ++static struct hlsl_type *hlsl_new_simple_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class class) ++{ ++ struct hlsl_type *type; ++ ++ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) ++ return NULL; ++ if (!(type->name = hlsl_strdup(ctx, name))) ++ { ++ vkd3d_free(type); ++ return NULL; ++ } ++ type->class = class; ++ hlsl_type_calculate_reg_size(ctx, type); ++ ++ list_add_tail(&ctx->types, &type->entry); ++ ++ return type; ++} ++ + static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class type_class, + enum hlsl_base_type base_type, unsigned dimx, unsigned dimy) + { +@@ -377,7 +423,26 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e + + static bool type_is_single_component(const struct hlsl_type *type) + { +- return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_OBJECT; ++ switch (type->class) ++ { ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: ++ return true; ++ ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ case HLSL_CLASS_STRUCT: ++ case HLSL_CLASS_ARRAY: ++ return false; ++ ++ case HLSL_CLASS_VOID: ++ break; ++ } ++ vkd3d_unreachable(); + } + + /* Given a type and a component index, this function moves one step through the path required to +@@ -497,10 +562,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + break; + + case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: + assert(idx == 0); + break; + +- default: ++ case HLSL_CLASS_VOID: + vkd3d_unreachable(); + } + type = next_type; +@@ -727,7 +796,6 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->class = HLSL_CLASS_STRUCT; +- type->base_type = HLSL_TYPE_VOID; + type->name = name; + type->dimy = 1; + type->e.record.fields = fields; +@@ -746,8 +814,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; +- type->class = HLSL_CLASS_OBJECT; +- type->base_type = HLSL_TYPE_TEXTURE; ++ type->class = HLSL_CLASS_TEXTURE; + type->dimx = 4; + type->dimy = 1; + type->sampler_dim = dim; +@@ -765,8 +832,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; +- type->class = HLSL_CLASS_OBJECT; +- type->base_type = HLSL_TYPE_UAV; ++ type->class = HLSL_CLASS_UAV; + type->dimx = format->dimx; + type->dimy = 1; + type->sampler_dim = dim; +@@ -784,7 +850,10 @@ static const char * get_case_insensitive_typename(const char *name) + "dword", + "float", + "matrix", ++ "pixelshader", ++ "texture", + "vector", ++ "vertexshader", + }; + unsigned int i; + +@@ -866,11 +935,17 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; + + case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: + return 1; + +- default: +- vkd3d_unreachable(); ++ case HLSL_CLASS_VOID: ++ break; + } ++ ++ vkd3d_unreachable(); + } + + bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2) +@@ -882,16 +957,15 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + return false; + if (t1->base_type != t2->base_type) + return false; +- if (t1->base_type == HLSL_TYPE_SAMPLER || t1->base_type == HLSL_TYPE_TEXTURE +- || t1->base_type == HLSL_TYPE_UAV) ++ if (t1->class == HLSL_CLASS_SAMPLER || t1->class == HLSL_CLASS_TEXTURE || t1->class == HLSL_CLASS_UAV) + { + if (t1->sampler_dim != t2->sampler_dim) + return false; +- if ((t1->base_type == HLSL_TYPE_TEXTURE || t1->base_type == HLSL_TYPE_UAV) ++ if ((t1->class == HLSL_CLASS_TEXTURE || t1->class == HLSL_CLASS_UAV) + && t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC + && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) + return false; +- if (t1->base_type == HLSL_TYPE_UAV && t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) ++ if (t1->class == HLSL_CLASS_UAV && t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) + return false; + } + if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) +@@ -1008,14 +1082,16 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + break; + } + ++ case HLSL_CLASS_UAV: ++ type->e.resource.rasteriser_ordered = old->e.resource.rasteriser_ordered; ++ /* fall through */ ++ case HLSL_CLASS_TEXTURE: ++ type->e.resource.format = old->e.resource.format; ++ break; ++ + case HLSL_CLASS_OBJECT: + if (type->base_type == HLSL_TYPE_TECHNIQUE) + type->e.version = old->e.version; +- if (old->base_type == HLSL_TYPE_TEXTURE || old->base_type == HLSL_TYPE_UAV) +- { +- type->e.resource.format = old->e.resource.format; +- type->e.resource.rasteriser_ordered = old->e.resource.rasteriser_ordered; +- } + break; + + default: +@@ -1346,6 +1422,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); + } + ++struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, ++ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; ++ ++ assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); ++ assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); ++ return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); ++} ++ + struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) + { +@@ -1548,6 +1634,27 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned + return &swizzle->node; + } + ++struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, ++ struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_stateblock_constant *constant; ++ struct hlsl_type *type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); ++ ++ if (!(constant = hlsl_alloc(ctx, sizeof(*constant)))) ++ return NULL; ++ ++ init_node(&constant->node, HLSL_IR_STATEBLOCK_CONSTANT, type, loc); ++ ++ if (!(constant->name = hlsl_alloc(ctx, strlen(name) + 1))) ++ { ++ vkd3d_free(constant); ++ return NULL; ++ } ++ strcpy(constant->name, name); ++ ++ return &constant->node; ++} ++ + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) + { + struct hlsl_type *type = index->val.node->data_type; +@@ -1557,7 +1664,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) + + bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) + { +- return index->val.node->data_type->class == HLSL_CLASS_OBJECT; ++ const struct hlsl_type *type = index->val.node->data_type; ++ ++ return type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV; + } + + bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) +@@ -1578,7 +1687,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v + if (!(index = hlsl_alloc(ctx, sizeof(*index)))) + return NULL; + +- if (type->class == HLSL_CLASS_OBJECT) ++ if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV) + type = type->e.resource.format; + else if (type->class == HLSL_CLASS_MATRIX) + type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); +@@ -1868,6 +1977,12 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr + return dst; + } + ++static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, ++ struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) ++{ ++ return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); ++} ++ + void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) + { + hlsl_block_cleanup(&c->body); +@@ -1963,6 +2078,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + + case HLSL_IR_SWIZZLE: + return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); ++ ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); + } + + vkd3d_unreachable(); +@@ -2018,7 +2136,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, + } + + struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, +- const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) ++ uint32_t modifiers, const struct hlsl_reg_reservation *reservation, struct hlsl_scope *annotations, ++ const struct vkd3d_shader_location *loc) + { + struct hlsl_buffer *buffer; + +@@ -2026,8 +2145,10 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type + return NULL; + buffer->type = type; + buffer->name = name; ++ buffer->modifiers = modifiers; + if (reservation) + buffer->reservation = *reservation; ++ buffer->annotations = annotations; + buffer->loc = *loc; + list_add_tail(&ctx->buffers, &buffer->entry); + return buffer; +@@ -2130,6 +2251,19 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + [HLSL_TYPE_BOOL] = "bool", + }; + ++ static const char *const dimensions[] = ++ { ++ [HLSL_SAMPLER_DIM_1D] = "1D", ++ [HLSL_SAMPLER_DIM_2D] = "2D", ++ [HLSL_SAMPLER_DIM_3D] = "3D", ++ [HLSL_SAMPLER_DIM_CUBE] = "Cube", ++ [HLSL_SAMPLER_DIM_1DARRAY] = "1DArray", ++ [HLSL_SAMPLER_DIM_2DARRAY] = "2DArray", ++ [HLSL_SAMPLER_DIM_2DMS] = "2DMS", ++ [HLSL_SAMPLER_DIM_2DMSARRAY] = "2DMSArray", ++ [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", ++ }; ++ + if (!(string = hlsl_get_string_buffer(ctx))) + return NULL; + +@@ -2183,71 +2317,53 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + vkd3d_string_buffer_printf(string, ""); + return string; + +- case HLSL_CLASS_OBJECT: +- { +- static const char *const dimensions[] = ++ case HLSL_CLASS_TEXTURE: ++ if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { +- [HLSL_SAMPLER_DIM_1D] = "1D", +- [HLSL_SAMPLER_DIM_2D] = "2D", +- [HLSL_SAMPLER_DIM_3D] = "3D", +- [HLSL_SAMPLER_DIM_CUBE] = "Cube", +- [HLSL_SAMPLER_DIM_1DARRAY] = "1DArray", +- [HLSL_SAMPLER_DIM_2DARRAY] = "2DArray", +- [HLSL_SAMPLER_DIM_2DMS] = "2DMS", +- [HLSL_SAMPLER_DIM_2DMSARRAY] = "2DMSArray", +- [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", +- }; +- +- switch (type->base_type) +- { +- case HLSL_TYPE_TEXTURE: +- if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) +- { +- vkd3d_string_buffer_printf(string, "Texture"); +- return string; +- } +- +- assert(type->e.resource.format->base_type < ARRAY_SIZE(base_types)); +- if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) +- { +- vkd3d_string_buffer_printf(string, "Buffer"); +- } +- else +- { +- assert(type->sampler_dim < ARRAY_SIZE(dimensions)); +- vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); +- } +- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) +- { +- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); +- hlsl_release_string_buffer(ctx, inner_string); +- } +- return string; +- +- case HLSL_TYPE_UAV: +- if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) +- vkd3d_string_buffer_printf(string, "RWBuffer"); +- else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) +- vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); +- else +- vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); +- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) +- { +- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); +- hlsl_release_string_buffer(ctx, inner_string); +- } +- return string; ++ vkd3d_string_buffer_printf(string, "Texture"); ++ return string; ++ } + +- default: +- vkd3d_string_buffer_printf(string, ""); +- return string; ++ assert(type->e.resource.format->base_type < ARRAY_SIZE(base_types)); ++ if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) ++ { ++ vkd3d_string_buffer_printf(string, "Buffer"); + } +- } ++ else ++ { ++ assert(type->sampler_dim < ARRAY_SIZE(dimensions)); ++ vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); ++ } ++ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) ++ { ++ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); ++ hlsl_release_string_buffer(ctx, inner_string); ++ } ++ return string; + +- default: +- vkd3d_string_buffer_printf(string, ""); ++ case HLSL_CLASS_UAV: ++ if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) ++ vkd3d_string_buffer_printf(string, "RWBuffer"); ++ else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) ++ vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); ++ else ++ vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); ++ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) ++ { ++ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); ++ hlsl_release_string_buffer(ctx, inner_string); ++ } + return string; ++ ++ case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_VOID: ++ break; + } ++ ++ vkd3d_string_buffer_printf(string, ""); ++ return string; + } + + struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, +@@ -2611,10 +2727,10 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP2_MUL] = "*", + [HLSL_OP2_NEQUAL] = "!=", + [HLSL_OP2_RSHIFT] = ">>", ++ [HLSL_OP2_SLT] = "slt", + + [HLSL_OP3_CMP] = "cmp", + [HLSL_OP3_DP2ADD] = "dp2add", +- [HLSL_OP3_MOVC] = "movc", + [HLSL_OP3_TERNARY] = "ternary", + }; + +@@ -2791,6 +2907,12 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ + vkd3d_string_buffer_printf(buffer, "]"); + } + ++static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, ++ const struct hlsl_ir_stateblock_constant *constant) ++{ ++ vkd3d_string_buffer_printf(buffer, "%s", constant->name); ++} ++ + static void dump_ir_switch(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_switch *s) + { + struct hlsl_ir_switch_case *c; +@@ -2879,6 +3001,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + case HLSL_IR_SWIZZLE: + dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); + break; ++ ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); ++ break; + } + } + +@@ -3051,6 +3177,12 @@ static void free_ir_index(struct hlsl_ir_index *index) + vkd3d_free(index); + } + ++static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) ++{ ++ vkd3d_free(constant->name); ++ vkd3d_free(constant); ++} ++ + void hlsl_free_instr(struct hlsl_ir_node *node) + { + assert(list_empty(&node->uses)); +@@ -3108,6 +3240,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) + case HLSL_IR_SWITCH: + free_ir_switch(hlsl_ir_switch(node)); + break; ++ ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); ++ break; + } + } + +@@ -3273,7 +3409,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) + {"cs_4_0", VKD3D_SHADER_TYPE_COMPUTE, 4, 0, 0, 0, false}, + {"cs_4_1", VKD3D_SHADER_TYPE_COMPUTE, 4, 1, 0, 0, false}, + {"cs_5_0", VKD3D_SHADER_TYPE_COMPUTE, 5, 0, 0, 0, false}, ++ {"cs_5_1", VKD3D_SHADER_TYPE_COMPUTE, 5, 1, 0, 0, false}, + {"ds_5_0", VKD3D_SHADER_TYPE_DOMAIN, 5, 0, 0, 0, false}, ++ {"ds_5_1", VKD3D_SHADER_TYPE_DOMAIN, 5, 1, 0, 0, false}, + {"fx_2_0", VKD3D_SHADER_TYPE_EFFECT, 2, 0, 0, 0, false}, + {"fx_4_0", VKD3D_SHADER_TYPE_EFFECT, 4, 0, 0, 0, false}, + {"fx_4_1", VKD3D_SHADER_TYPE_EFFECT, 4, 1, 0, 0, false}, +@@ -3281,7 +3419,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) + {"gs_4_0", VKD3D_SHADER_TYPE_GEOMETRY, 4, 0, 0, 0, false}, + {"gs_4_1", VKD3D_SHADER_TYPE_GEOMETRY, 4, 1, 0, 0, false}, + {"gs_5_0", VKD3D_SHADER_TYPE_GEOMETRY, 5, 0, 0, 0, false}, ++ {"gs_5_1", VKD3D_SHADER_TYPE_GEOMETRY, 5, 1, 0, 0, false}, + {"hs_5_0", VKD3D_SHADER_TYPE_HULL, 5, 0, 0, 0, false}, ++ {"hs_5_1", VKD3D_SHADER_TYPE_HULL, 5, 1, 0, 0, false}, + {"ps.1.0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false}, + {"ps.1.1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false}, + {"ps.1.2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false}, +@@ -3309,6 +3449,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) + {"ps_4_0_level_9_3", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 3, false}, + {"ps_4_1", VKD3D_SHADER_TYPE_PIXEL, 4, 1, 0, 0, false}, + {"ps_5_0", VKD3D_SHADER_TYPE_PIXEL, 5, 0, 0, 0, false}, ++ {"ps_5_1", VKD3D_SHADER_TYPE_PIXEL, 5, 1, 0, 0, false}, + {"tx_1_0", VKD3D_SHADER_TYPE_TEXTURE, 1, 0, 0, 0, false}, + {"vs.1.0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false}, + {"vs.1.1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false}, +@@ -3330,6 +3471,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) + {"vs_4_0_level_9_3", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 3, false}, + {"vs_4_1", VKD3D_SHADER_TYPE_VERTEX, 4, 1, 0, 0, false}, + {"vs_5_0", VKD3D_SHADER_TYPE_VERTEX, 5, 0, 0, 0, false}, ++ {"vs_5_1", VKD3D_SHADER_TYPE_VERTEX, 5, 1, 0, 0, false}, + }; + + for (i = 0; i < ARRAY_SIZE(profiles); ++i) +@@ -3393,10 +3535,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, + {"fxgroup", HLSL_CLASS_OBJECT, HLSL_TYPE_EFFECT_GROUP, 1, 1}, + {"pass", HLSL_CLASS_OBJECT, HLSL_TYPE_PASS, 1, 1}, +- {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, +- {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, +- {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, +- {"VERTEXSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, ++ {"pixelshader", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, ++ {"vertexshader", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, + {"RenderTargetView",HLSL_CLASS_OBJECT, HLSL_TYPE_RENDERTARGETVIEW, 1, 1}, + {"DepthStencilView",HLSL_CLASS_OBJECT, HLSL_TYPE_DEPTHSTENCILVIEW, 1, 1}, + }; +@@ -3504,12 +3644,14 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + + for (bt = 0; bt <= HLSL_SAMPLER_DIM_LAST_SAMPLER; ++bt) + { +- type = hlsl_new_type(ctx, sampler_names[bt], HLSL_CLASS_OBJECT, HLSL_TYPE_SAMPLER, 1, 1); ++ type = hlsl_new_simple_type(ctx, sampler_names[bt], HLSL_CLASS_SAMPLER); + type->sampler_dim = bt; + ctx->builtin_types.sampler[bt] = type; + } + +- ctx->builtin_types.Void = hlsl_new_type(ctx, "void", HLSL_CLASS_OBJECT, HLSL_TYPE_VOID, 1, 1); ++ ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); + + for (i = 0; i < ARRAY_SIZE(effect_types); ++i) + { +@@ -3571,27 +3713,46 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil + list_init(&ctx->buffers); + + if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, +- hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) ++ hlsl_strdup(ctx, "$Globals"), 0, NULL, NULL, &ctx->location))) + return false; + if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, +- hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) ++ hlsl_strdup(ctx, "$Params"), 0, NULL, NULL, &ctx->location))) + return false; + ctx->cur_buffer = ctx->globals_buffer; + ++ ctx->warn_implicit_truncation = true; ++ + for (i = 0; i < compile_info->option_count; ++i) + { + const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; + +- if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) ++ switch (option->name) + { +- if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) +- ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; +- else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) +- ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; +- } +- else if (option->name == VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY) +- { +- ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; ++ case VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER: ++ if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) ++ ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; ++ else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) ++ ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; ++ break; ++ ++ case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: ++ ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; ++ break; ++ ++ case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: ++ ctx->child_effect = option->value; ++ break; ++ ++ case VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION: ++ ctx->warn_implicit_truncation = option->value; ++ break; ++ ++ case VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS: ++ ctx->include_empty_buffers = option->value; ++ break; ++ ++ default: ++ break; + } + } + +@@ -3615,6 +3776,21 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) + + rb_destroy(&ctx->functions, free_function_rb, NULL); + ++ /* State blocks must be free before the variables, because they contain instructions that may ++ * refer to them. */ ++ LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ for (i = 0; i < var->state_block_count; ++i) ++ hlsl_free_state_block(var->state_blocks[i]); ++ vkd3d_free(var->state_blocks); ++ var->state_blocks = NULL; ++ var->state_block_count = 0; ++ var->state_block_capacity = 0; ++ } ++ } ++ + LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) +@@ -3638,6 +3814,7 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) + int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { ++ enum vkd3d_shader_target_type target_type = compile_info->target_type; + const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; + struct hlsl_ir_function_decl *decl, *entry_func = NULL; + const struct hlsl_profile_info *profile; +@@ -3659,25 +3836,25 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + +- if (compile_info->target_type != VKD3D_SHADER_TARGET_FX && profile->type == VKD3D_SHADER_TYPE_EFFECT) ++ if (target_type != VKD3D_SHADER_TARGET_FX && profile->type == VKD3D_SHADER_TYPE_EFFECT) + { + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The '%s' target profile is only compatible with the 'fx' target type.", profile->name); + return VKD3D_ERROR_INVALID_ARGUMENT; + } +- else if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_BYTECODE && profile->major_version > 3) ++ else if (target_type == VKD3D_SHADER_TARGET_D3D_BYTECODE && profile->major_version > 3) + { + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The '%s' target profile is incompatible with the 'd3dbc' target type.", profile->name); + return VKD3D_ERROR_INVALID_ARGUMENT; + } +- else if (compile_info->target_type == VKD3D_SHADER_TARGET_DXBC_TPF && profile->major_version < 4) ++ else if (target_type == VKD3D_SHADER_TARGET_DXBC_TPF && profile->major_version < 4) + { + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The '%s' target profile is incompatible with the 'dxbc-tpf' target type.", profile->name); + return VKD3D_ERROR_INVALID_ARGUMENT; + } +- else if (compile_info->target_type == VKD3D_SHADER_TARGET_FX && profile->type != VKD3D_SHADER_TYPE_EFFECT) ++ else if (target_type == VKD3D_SHADER_TARGET_FX && profile->type != VKD3D_SHADER_TYPE_EFFECT) + { + vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The '%s' target profile is incompatible with the 'fx' target type.", profile->name); +@@ -3741,8 +3918,40 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d + return VKD3D_ERROR_INVALID_SHADER; + } + +- ret = hlsl_emit_bytecode(&ctx, entry_func, compile_info->target_type, out); ++ if (target_type == VKD3D_SHADER_TARGET_SPIRV_BINARY ++ || target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT ++ || target_type == VKD3D_SHADER_TARGET_D3D_ASM) ++ { ++ struct vkd3d_shader_compile_info info = *compile_info; ++ struct vkd3d_shader_parser *parser; ++ ++ if (profile->major_version < 4) ++ { ++ if ((ret = hlsl_emit_bytecode(&ctx, entry_func, VKD3D_SHADER_TARGET_D3D_BYTECODE, &info.source)) < 0) ++ goto done; ++ info.source_type = VKD3D_SHADER_SOURCE_D3D_BYTECODE; ++ ret = vkd3d_shader_sm1_parser_create(&info, message_context, &parser); ++ } ++ else ++ { ++ if ((ret = hlsl_emit_bytecode(&ctx, entry_func, VKD3D_SHADER_TARGET_DXBC_TPF, &info.source)) < 0) ++ goto done; ++ info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; ++ ret = vkd3d_shader_sm4_parser_create(&info, message_context, &parser); ++ } ++ if (ret >= 0) ++ { ++ ret = vkd3d_shader_parser_compile(parser, &info, out, message_context); ++ vkd3d_shader_parser_destroy(parser); ++ } ++ vkd3d_shader_free_shader_code(&info.source); ++ } ++ else ++ { ++ ret = hlsl_emit_bytecode(&ctx, entry_func, target_type, out); ++ } + ++done: + hlsl_ctx_cleanup(&ctx); + return ret; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index df0a53b20de..7a8fe4de437 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -79,6 +79,11 @@ enum hlsl_type_class + HLSL_CLASS_STRUCT, + HLSL_CLASS_ARRAY, + HLSL_CLASS_OBJECT, ++ HLSL_CLASS_SAMPLER, ++ HLSL_CLASS_STRING, ++ HLSL_CLASS_TEXTURE, ++ HLSL_CLASS_UAV, ++ HLSL_CLASS_VOID, + }; + + enum hlsl_base_type +@@ -90,9 +95,6 @@ enum hlsl_base_type + HLSL_TYPE_UINT, + HLSL_TYPE_BOOL, + HLSL_TYPE_LAST_SCALAR = HLSL_TYPE_BOOL, +- HLSL_TYPE_SAMPLER, +- HLSL_TYPE_TEXTURE, +- HLSL_TYPE_UAV, + HLSL_TYPE_PIXELSHADER, + HLSL_TYPE_VERTEXSHADER, + HLSL_TYPE_PASS, +@@ -100,8 +102,6 @@ enum hlsl_base_type + HLSL_TYPE_DEPTHSTENCILVIEW, + HLSL_TYPE_TECHNIQUE, + HLSL_TYPE_EFFECT_GROUP, +- HLSL_TYPE_STRING, +- HLSL_TYPE_VOID, + }; + + enum hlsl_sampler_dim +@@ -150,10 +150,10 @@ struct hlsl_type + * Otherwise, base_type is not used. */ + enum hlsl_base_type base_type; + +- /* If base_type is HLSL_TYPE_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. +- * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim can be any value of the enum except ++ /* If class is HLSL_CLASS_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. ++ * If class is HLSL_CLASS_TEXTURE, then sampler_dim can be any value of the enum except + * HLSL_SAMPLER_DIM_GENERIC and HLSL_SAMPLER_DIM_COMPARISON. +- * If base_type is HLSL_TYPE_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, ++ * If class is HLSL_CLASS_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, + * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, HLSL_SAMPLER_DIM_2DARRAY, + * HLSL_SAMPLER_DIM_BUFFER, or HLSL_SAMPLER_DIM_STRUCTURED_BUFFER. + * Otherwise, sampler_dim is not used */ +@@ -171,11 +171,7 @@ struct hlsl_type + * If type is HLSL_CLASS_MATRIX, then dimx is the number of columns, and dimy the number of rows. + * If type is HLSL_CLASS_ARRAY, then dimx and dimy have the same value as in the type of the array elements. + * If type is HLSL_CLASS_STRUCT, then dimx is the sum of (dimx * dimy) of every component, and dimy = 1. +- * If type is HLSL_CLASS_OBJECT, dimx and dimy depend on the base_type: +- * If base_type is HLSL_TYPE_SAMPLER, then both dimx = 1 and dimy = 1. +- * If base_type is HLSL_TYPE_TEXTURE, then dimx = 4 and dimy = 1. +- * If base_type is HLSL_TYPE_UAV, then dimx is the dimx of e.resource_format, and dimy = 1. +- * Otherwise both dimx = 1 and dimy = 1. */ ++ */ + unsigned int dimx; + unsigned int dimy; + /* Sample count for HLSL_SAMPLER_DIM_2DMS or HLSL_SAMPLER_DIM_2DMSARRAY. */ +@@ -196,8 +192,8 @@ struct hlsl_type + /* Array length, or HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT if it is not known yet at parse time. */ + unsigned int elements_count; + } array; +- /* Additional information if the base_type is HLSL_TYPE_TEXTURE or +- * HLSL_TYPE_UAV. */ ++ /* Additional information if the class is HLSL_CLASS_TEXTURE or ++ * HLSL_CLASS_UAV. */ + struct + { + /* Format of the data contained within the type. */ +@@ -298,6 +294,7 @@ enum hlsl_ir_node_type + HLSL_IR_STORE, + HLSL_IR_SWIZZLE, + HLSL_IR_SWITCH, ++ HLSL_IR_STATEBLOCK_CONSTANT, + }; + + /* Common data for every type of IR instruction node. */ +@@ -374,6 +371,8 @@ struct hlsl_attribute + #define HLSL_STORAGE_CENTROID 0x00004000 + #define HLSL_STORAGE_NOPERSPECTIVE 0x00008000 + #define HLSL_STORAGE_LINEAR 0x00010000 ++#define HLSL_MODIFIER_SINGLE 0x00020000 ++#define HLSL_MODIFIER_EXPORT 0x00040000 + + #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ +@@ -421,6 +420,14 @@ struct hlsl_ir_var + /* Scope that contains annotations for this variable. */ + struct hlsl_scope *annotations; + ++ /* A dynamic array containing the state block on the variable's declaration, if any. ++ * An array variable may contain multiple state blocks. ++ * A technique pass will always contain one. ++ * These are only really used for effect profiles. */ ++ struct hlsl_state_block **state_blocks; ++ unsigned int state_block_count; ++ size_t state_block_capacity; ++ + /* Indexes of the IR instructions where the variable is first written and last read (liveness + * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 + * means function entry. */ +@@ -456,6 +463,38 @@ struct hlsl_ir_var + uint32_t is_separated_resource : 1; + }; + ++/* This struct is used to represent assignments in state block entries: ++ * name = {args[0], args[1], ...}; ++ * - or - ++ * name = args[0] ++ * - or - ++ * name[lhs_index] = args[0] ++ * - or - ++ * name[lhs_index] = {args[0], args[1], ...}; ++ */ ++struct hlsl_state_block_entry ++{ ++ /* For assignments, the name in the lhs. */ ++ char *name; ++ ++ /* Whether the lhs in the assignment is indexed and, in that case, its index. */ ++ bool lhs_has_index; ++ unsigned int lhs_index; ++ ++ /* Instructions present in the rhs. */ ++ struct hlsl_block *instrs; ++ ++ /* For assignments, arguments of the rhs initializer. */ ++ struct hlsl_ir_node **args; ++ unsigned int args_count; ++}; ++ ++struct hlsl_state_block ++{ ++ struct hlsl_state_block_entry **entries; ++ size_t count, capacity; ++}; ++ + /* Sized array of variables representing a function's parameters. */ + struct hlsl_func_parameters + { +@@ -593,18 +632,15 @@ enum hlsl_ir_expr_op + HLSL_OP2_MUL, + HLSL_OP2_NEQUAL, + HLSL_OP2_RSHIFT, ++ /* SLT(a, b) retrieves 1.0 if (a < b), else 0.0. Only used for SM1-SM3 target vertex shaders. */ ++ HLSL_OP2_SLT, + + /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, + * then adds c. */ + HLSL_OP3_DP2ADD, +- /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. +- * TERNARY(a, b, c) returns c if a == 0 and b otherwise. +- * They differ for floating point numbers, because +- * -0.0 == 0.0, but it is not bitwise zero. CMP(a, b, c) returns b +- if a >= 0, and c otherwise. It's used only for SM1-SM3 targets, while +- SM4+ is using MOVC in such cases. */ ++ /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. ++ * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ + HLSL_OP3_CMP, +- HLSL_OP3_MOVC, + HLSL_OP3_TERNARY, + }; + +@@ -750,6 +786,14 @@ struct hlsl_ir_constant + struct hlsl_reg reg; + }; + ++/* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, ++ * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ ++struct hlsl_ir_stateblock_constant ++{ ++ struct hlsl_ir_node node; ++ char *name; ++}; ++ + struct hlsl_scope + { + /* Item entry for hlsl_ctx.scopes. */ +@@ -798,10 +842,13 @@ struct hlsl_buffer + struct vkd3d_shader_location loc; + enum hlsl_buffer_type type; + const char *name; ++ uint32_t modifiers; + /* Register reserved for this buffer, if any. + * If provided, it should be of type 'b' if type is HLSL_BUFFER_CONSTANT and 't' if type is + * HLSL_BUFFER_TEXTURE. */ + struct hlsl_reg_reservation reservation; ++ /* Scope that contains annotations for this buffer. */ ++ struct hlsl_scope *annotations; + /* Item entry for hlsl_ctx.buffers */ + struct list entry; + +@@ -920,8 +967,21 @@ struct hlsl_ctx + uint32_t found_numthreads : 1; + + bool semantic_compat_mapping; ++ bool child_effect; ++ bool include_empty_buffers; ++ bool warn_implicit_truncation; + }; + ++static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); ++} ++ ++static inline bool hlsl_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return !hlsl_version_ge(ctx, major, minor); ++} ++ + struct hlsl_resource_load_params + { + struct hlsl_type *format; +@@ -1009,6 +1069,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n + return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); + } + ++static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) ++{ ++ assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); ++ return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); ++} ++ + static inline void hlsl_block_init(struct hlsl_block *block) + { + list_init(&block->instrs); +@@ -1201,6 +1267,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); + void hlsl_free_attribute(struct hlsl_attribute *attr); + void hlsl_free_instr(struct hlsl_ir_node *node); + void hlsl_free_instr_list(struct list *list); ++void hlsl_free_state_block(struct hlsl_state_block *state_block); + void hlsl_free_type(struct hlsl_type *type); + void hlsl_free_var(struct hlsl_ir_var *decl); + +@@ -1222,7 +1289,8 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp + struct hlsl_ir_node *arg2); + struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); + struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, +- const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); ++ uint32_t modifiers, const struct hlsl_reg_reservation *reservation, struct hlsl_scope *annotations, ++ const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, + const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, +@@ -1243,6 +1311,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond + struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, + enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, ++ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); + + void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); + +@@ -1279,6 +1349,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + struct hlsl_struct_field *fields, size_t field_count); + struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, + struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, ++ struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, + struct hlsl_type *type, const struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, +@@ -1356,6 +1428,8 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), + struct hlsl_block *block, void *context); + ++D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); ++D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); + bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); + bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index 558506db108..88b917eff11 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -76,6 +76,7 @@ case {return KW_CASE; } + cbuffer {return KW_CBUFFER; } + centroid {return KW_CENTROID; } + column_major {return KW_COLUMN_MAJOR; } ++ComputeShader {return KW_COMPUTESHADER; } + compile {return KW_COMPILE; } + const {return KW_CONST; } + continue {return KW_CONTINUE; } +@@ -83,15 +84,18 @@ DepthStencilState {return KW_DEPTHSTENCILSTATE; } + DepthStencilView {return KW_DEPTHSTENCILVIEW; } + default {return KW_DEFAULT; } + discard {return KW_DISCARD; } ++DomainShader {return KW_DOMAINSHADER; } + do {return KW_DO; } + double {return KW_DOUBLE; } + else {return KW_ELSE; } ++export {return KW_EXPORT; } + extern {return KW_EXTERN; } + false {return KW_FALSE; } + for {return KW_FOR; } + fxgroup {return KW_FXGROUP; } + GeometryShader {return KW_GEOMETRYSHADER; } + groupshared {return KW_GROUPSHARED; } ++HullShader {return KW_HULLSHADER; } + if {return KW_IF; } + in {return KW_IN; } + inline {return KW_INLINE; } +@@ -105,7 +109,7 @@ out {return KW_OUT; } + packoffset {return KW_PACKOFFSET; } + pass {return KW_PASS; } + PixelShader {return KW_PIXELSHADER; } +-precise {return KW_PRECISE; } ++pixelshader {return KW_PIXELSHADER; } + RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } + RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } + RasterizerOrderedTexture1D {return KW_RASTERIZERORDEREDTEXTURE1D; } +@@ -163,6 +167,7 @@ typedef {return KW_TYPEDEF; } + uniform {return KW_UNIFORM; } + vector {return KW_VECTOR; } + VertexShader {return KW_VERTEXSHADER; } ++vertexshader {return KW_VERTEXSHADER; } + void {return KW_VOID; } + volatile {return KW_VOLATILE; } + while {return KW_WHILE; } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index cd05fd008a6..0c196b77595 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -77,6 +77,10 @@ struct parse_variable_def + struct hlsl_type *basic_type; + uint32_t modifiers; + struct vkd3d_shader_location modifiers_loc; ++ ++ struct hlsl_state_block **state_blocks; ++ unsigned int state_block_count; ++ size_t state_block_capacity; + }; + + struct parse_function +@@ -114,6 +118,12 @@ struct parse_attribute_list + const struct hlsl_attribute **attrs; + }; + ++struct state_block_index ++{ ++ bool has_index; ++ unsigned int index; ++}; ++ + } + + %code provides +@@ -413,7 +423,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct + return NULL; + } + +- if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) ++ if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy && ctx->warn_implicit_truncation) + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", + src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); + +@@ -438,8 +448,9 @@ static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t + + static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) + { +- struct hlsl_ir_node *condition, *not, *iff, *jump; ++ struct hlsl_ir_node *condition, *cast, *not, *iff, *jump; + struct hlsl_block then_block; ++ struct hlsl_type *bool_type; + + /* E.g. "for (i = 0; ; ++i)". */ + if (list_empty(&cond_block->instrs)) +@@ -449,7 +460,12 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co + + check_condition_type(ctx, condition); + +- if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) ++ bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); ++ if (!(cast = hlsl_new_cast(ctx, condition, bool_type, &condition->loc))) ++ return false; ++ hlsl_block_add_instr(cond_block, cast); ++ ++ if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc))) + return false; + hlsl_block_add_instr(cond_block, not); + +@@ -817,8 +833,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str + const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; + struct hlsl_ir_node *return_index, *cast; + +- if (expr_type->class == HLSL_CLASS_OBJECT +- && (expr_type->base_type == HLSL_TYPE_TEXTURE || expr_type->base_type == HLSL_TYPE_UAV) ++ if ((expr_type->class == HLSL_CLASS_TEXTURE || expr_type->class == HLSL_CLASS_UAV) + && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); +@@ -925,24 +940,10 @@ static void free_parse_variable_def(struct parse_variable_def *v) + vkd3d_free(v->arrays.sizes); + vkd3d_free(v->name); + hlsl_cleanup_semantic(&v->semantic); ++ assert(!v->state_blocks); + vkd3d_free(v); + } + +-static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) +-{ +- return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; +-} +- +-static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +-{ +- return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); +-} +- +-static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +-{ +- return !shader_profile_version_ge(ctx, major, minor); +-} +- + static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + struct hlsl_type *type, uint32_t modifiers, struct list *defs) + { +@@ -965,7 +966,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + + field->type = type; + +- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) ++ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) + { + for (k = 0; k < v->arrays.count; ++k) + unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); +@@ -1115,7 +1116,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters + } + + static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations, +- const struct vkd3d_shader_location *loc) ++ struct hlsl_state_block *state_block, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_var *var; + struct hlsl_type *type; +@@ -1125,6 +1126,11 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope * + return false; + var->annotations = annotations; + ++ var->state_blocks = hlsl_alloc(ctx, sizeof(*var->state_blocks)); ++ var->state_blocks[0] = state_block; ++ var->state_block_count = 1; ++ var->state_block_capacity = 1; ++ + if (!hlsl_add_var(ctx, var, false)) + { + struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); +@@ -1210,7 +1216,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const + struct hlsl_reg_reservation reservation = {0}; + char *endptr; + +- if (shader_profile_version_lt(ctx, 4, 0)) ++ if (hlsl_version_lt(ctx, 4, 0)) + return reservation; + + reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); +@@ -1293,6 +1299,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_STORE: + case HLSL_IR_SWITCH: ++ case HLSL_IR_STATEBLOCK_CONSTANT: + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + } +@@ -1933,10 +1940,9 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + return NULL; + + resource_type = hlsl_deref_get_type(ctx, &resource_deref); +- assert(resource_type->class == HLSL_CLASS_OBJECT); +- assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV); ++ assert(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); + +- if (resource_type->base_type != HLSL_TYPE_UAV) ++ if (resource_type->class != HLSL_CLASS_UAV) + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Read-only resources cannot be stored to."); + +@@ -2085,24 +2091,23 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i + } + } + +-static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) ++static bool type_has_object_components(const struct hlsl_type *type) + { +- if (type->class == HLSL_CLASS_OBJECT) +- return !must_be_in_struct; + if (type->class == HLSL_CLASS_ARRAY) +- return type_has_object_components(type->e.array.type, must_be_in_struct); ++ return type_has_object_components(type->e.array.type); + + if (type->class == HLSL_CLASS_STRUCT) + { +- unsigned int i; +- +- for (i = 0; i < type->e.record.field_count; ++i) ++ for (unsigned int i = 0; i < type->e.record.field_count; ++i) + { +- if (type_has_object_components(type->e.record.fields[i].type, false)) ++ if (type_has_object_components(type->e.record.fields[i].type)) + return true; + } ++ ++ return false; + } +- return false; ++ ++ return !hlsl_is_numeric_type(type); + } + + static bool type_has_numeric_components(struct hlsl_type *type) +@@ -2140,6 +2145,18 @@ static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int mo + } + } + ++static void check_invalid_object_fields(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) ++{ ++ const struct hlsl_type *type = var->data_type; ++ ++ while (type->class == HLSL_CLASS_ARRAY) ++ type = type->e.array.type; ++ ++ if (type->class == HLSL_CLASS_STRUCT && type_has_object_components(type)) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Target profile doesn't support objects as struct members in uniform variables."); ++} ++ + static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + { + struct hlsl_type *basic_type = v->basic_type; +@@ -2160,7 +2177,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + + type = basic_type; + +- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) ++ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) + { + for (i = 0; i < v->arrays.count; ++i) + unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); +@@ -2265,12 +2282,8 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + if (!(modifiers & HLSL_STORAGE_STATIC)) + var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + +- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && +- type_has_object_components(var->data_type, true)) +- { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Target profile doesn't support objects as struct members in uniform variables."); +- } ++ if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ check_invalid_object_fields(ctx, var); + + if ((func = hlsl_get_first_func_decl(ctx, var->name))) + { +@@ -2306,7 +2319,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + } + + if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) +- && type_has_object_components(var->data_type, false)) ++ && type_has_object_components(var->data_type)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Static variables cannot have both numeric and resource components."); +@@ -2349,8 +2362,25 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + free_parse_variable_def(v); + continue; + } ++ + type = var->data_type; + ++ var->state_blocks = v->state_blocks; ++ var->state_block_count = v->state_block_count; ++ var->state_block_capacity = v->state_block_capacity; ++ v->state_block_count = 0; ++ v->state_block_capacity = 0; ++ v->state_blocks = NULL; ++ ++ if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Expected %u state blocks, but got %u.", ++ hlsl_type_component_count(type), var->state_block_count); ++ free_parse_variable_def(v); ++ continue; ++ } ++ + if (v->initializer.args_count) + { + if (v->initializer.braces) +@@ -2394,7 +2424,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + + /* Initialize statics to zero by default. */ + +- if (type_has_object_components(var->data_type, false)) ++ if (type_has_object_components(var->data_type)) + { + free_parse_variable_def(v); + continue; +@@ -2650,12 +2680,14 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, + static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { ++ enum hlsl_base_type base_type; + struct hlsl_type *type; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + +- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ base_type = type->base_type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; ++ type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); + + return convert_args(ctx, params, type, loc); + } +@@ -2715,81 +2747,62 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, + return write_acos_or_asin(ctx, params, loc, false); + } + +-static bool intrinsic_all(struct hlsl_ctx *ctx, +- const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++/* Find the type corresponding to the given source type, with the same ++ * dimensions but a different base type. */ ++static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, ++ const struct hlsl_type *type, enum hlsl_base_type base_type) ++{ ++ return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); ++} ++ ++static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, ++ struct hlsl_ir_node *arg, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; ++ struct hlsl_ir_node *res, *load; + unsigned int i, count; + +- if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, one); ++ count = hlsl_type_component_count(arg->data_type); + +- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) ++ if (!(res = hlsl_add_load_component(ctx, params->instrs, arg, 0, loc))) + return false; +- hlsl_block_add_instr(params->instrs, zero); +- +- mul = one; + +- count = hlsl_type_component_count(arg->data_type); +- for (i = 0; i < count; ++i) ++ for (i = 1; i < count; ++i) + { + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + +- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) +- return false; ++ if (!(res = hlsl_new_binary_expr(ctx, op, res, load))) ++ return NULL; ++ hlsl_block_add_instr(params->instrs, res); + } + +- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); ++ return true; + } + +-static bool intrinsic_any(struct hlsl_ctx *ctx, ++static bool intrinsic_all(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; +- unsigned int i, count; ++ struct hlsl_ir_node *arg = params->args[0], *cast; ++ struct hlsl_type *bool_type; + +- if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) +- { +- hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); ++ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); ++ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) + return false; +- } +- +- if (arg->data_type->base_type == HLSL_TYPE_FLOAT) +- { +- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, zero); +- +- if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) +- return false; + +- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); +- } +- else if (arg->data_type->base_type == HLSL_TYPE_BOOL) +- { +- if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, bfalse); +- +- or = bfalse; +- +- count = hlsl_type_component_count(arg->data_type); +- for (i = 0; i < count; ++i) +- { +- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) +- return false; ++ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); ++} + +- if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) +- return false; +- } ++static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, ++ const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg = params->args[0], *cast; ++ struct hlsl_type *bool_type; + +- return true; +- } ++ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); ++ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) ++ return false; + +- hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); +- return false; ++ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); + } + + static bool intrinsic_asin(struct hlsl_ctx *ctx, +@@ -2857,20 +2870,20 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, + type->name, type->name, type->name); + if (ret < 0) + { +- vkd3d_string_buffer_cleanup(buf); ++ hlsl_release_string_buffer(ctx, buf); + return false; + } + + ret = vkd3d_string_buffer_printf(buf, body_template, type->name); + if (ret < 0) + { +- vkd3d_string_buffer_cleanup(buf); ++ hlsl_release_string_buffer(ctx, buf); + return false; + } + + func = hlsl_compile_internal_function(ctx, + atan2_mode ? atan2_name : atan_name, buf->buffer); +- vkd3d_string_buffer_cleanup(buf); ++ hlsl_release_string_buffer(ctx, buf); + if (!func) + return false; + +@@ -2890,15 +2903,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx, + return write_atan_or_atan2(ctx, params, loc, true); + } + +- +-/* Find the type corresponding to the given source type, with the same +- * dimensions but a different base type. */ +-static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, +- const struct hlsl_type *type, enum hlsl_base_type base_type) +-{ +- return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); +-} +- + static bool intrinsic_asfloat(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3022,6 +3026,46 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); + } + ++static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool sinh_mode) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_ir_node *arg; ++ const char *fn_name, *type_name; ++ char *body; ++ ++ static const char template[] = ++ "%s %s(%s x)\n" ++ "{\n" ++ " return (exp(x) %s exp(-x)) / 2;\n" ++ "}\n"; ++ static const char fn_name_sinh[] = "sinh"; ++ static const char fn_name_cosh[] = "cosh"; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ type_name = arg->data_type->name; ++ fn_name = sinh_mode ? fn_name_sinh : fn_name_cosh; ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, ++ type_name, fn_name, type_name, sinh_mode ? "-" : "+"))) ++ return false; ++ ++ func = hlsl_compile_internal_function(ctx, fn_name, body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ ++static bool intrinsic_cosh(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return write_cosh_or_sinh(ctx, params, loc, false); ++} ++ + static bool intrinsic_cross(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3155,6 +3199,94 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); + } + ++static bool intrinsic_determinant(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ static const char determinant2x2[] = ++ "%s determinant(%s2x2 m)\n" ++ "{\n" ++ " return m._11 * m._22 - m._12 * m._21;\n" ++ "}"; ++ static const char determinant3x3[] = ++ "%s determinant(%s3x3 m)\n" ++ "{\n" ++ " %s2x2 m1 = { m._22, m._23, m._32, m._33 };\n" ++ " %s2x2 m2 = { m._21, m._23, m._31, m._33 };\n" ++ " %s2x2 m3 = { m._21, m._22, m._31, m._32 };\n" ++ " %s3 v1 = { m._11, -m._12, m._13 };\n" ++ " %s3 v2 = { determinant(m1), determinant(m2), determinant(m3) };\n" ++ " return dot(v1, v2);\n" ++ "}"; ++ static const char determinant4x4[] = ++ "%s determinant(%s4x4 m)\n" ++ "{\n" ++ " %s3x3 m1 = { m._22, m._23, m._24, m._32, m._33, m._34, m._42, m._43, m._44 };\n" ++ " %s3x3 m2 = { m._21, m._23, m._24, m._31, m._33, m._34, m._41, m._43, m._44 };\n" ++ " %s3x3 m3 = { m._21, m._22, m._24, m._31, m._32, m._34, m._41, m._42, m._44 };\n" ++ " %s3x3 m4 = { m._21, m._22, m._23, m._31, m._32, m._33, m._41, m._42, m._43 };\n" ++ " %s4 v1 = { m._11, -m._12, m._13, -m._14 };\n" ++ " %s4 v2 = { determinant(m1), determinant(m2), determinant(m3), determinant(m4) };\n" ++ " return dot(v1, v2);\n" ++ "}"; ++ static const char *templates[] = ++ { ++ [2] = determinant2x2, ++ [3] = determinant3x3, ++ [4] = determinant4x4, ++ }; ++ ++ struct hlsl_ir_node *arg = params->args[0]; ++ const struct hlsl_type *type = arg->data_type; ++ struct hlsl_ir_function_decl *func; ++ const char *typename, *template; ++ unsigned int dim; ++ char *body; ++ ++ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_MATRIX) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); ++ return false; ++ } ++ ++ dim = min(type->dimx, type->dimy); ++ if (dim == 1) ++ { ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) ++ return false; ++ return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); ++ } ++ ++ typename = type->base_type == HLSL_TYPE_HALF ? "half" : "float"; ++ template = templates[dim]; ++ ++ switch (dim) ++ { ++ case 2: ++ body = hlsl_sprintf_alloc(ctx, template, typename, typename); ++ break; ++ case 3: ++ body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, ++ typename, typename, typename, typename); ++ break; ++ case 4: ++ body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, ++ typename, typename, typename, typename, typename); ++ break; ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (!body) ++ return false; ++ ++ func = hlsl_compile_internal_function(ctx, "determinant", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ + static bool intrinsic_distance(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3646,6 +3778,59 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, i, neg, loc); + } + ++static bool intrinsic_refract(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_type *r_type = params->args[0]->data_type; ++ struct hlsl_type *n_type = params->args[1]->data_type; ++ struct hlsl_type *i_type = params->args[2]->data_type; ++ struct hlsl_type *res_type, *idx_type, *scal_type; ++ struct parse_initializer mut_params; ++ struct hlsl_ir_function_decl *func; ++ enum hlsl_base_type base; ++ char *body; ++ ++ static const char template[] = ++ "%s refract(%s r, %s n, %s i)\n" ++ "{\n" ++ " %s d, t;\n" ++ " d = dot(r, n);\n" ++ " t = 1 - i.x * i.x * (1 - d * d);\n" ++ " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" ++ "}"; ++ ++ if (r_type->class == HLSL_CLASS_MATRIX ++ || n_type->class == HLSL_CLASS_MATRIX ++ || i_type->class == HLSL_CLASS_MATRIX) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported."); ++ return false; ++ } ++ ++ assert(params->args_count == 3); ++ mut_params = *params; ++ mut_params.args_count = 2; ++ if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) ++ return false; ++ ++ base = expr_common_base_type(res_type->base_type, i_type->base_type); ++ base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; ++ res_type = convert_numeric_type(ctx, res_type, base); ++ idx_type = convert_numeric_type(ctx, i_type, base); ++ scal_type = hlsl_get_scalar_type(ctx, base); ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name, ++ res_type->name, idx_type->name, scal_type->name))) ++ return false; ++ ++ func = hlsl_compile_internal_function(ctx, "refract", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ + static bool intrinsic_round(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3726,6 +3911,12 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); + } + ++static bool intrinsic_sinh(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return write_cosh_or_sinh(ctx, params, loc, true); ++} ++ + /* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ + static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +@@ -3798,6 +3989,39 @@ static bool intrinsic_tan(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, sin, cos, loc); + } + ++static bool intrinsic_tanh(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_ir_node *arg; ++ struct hlsl_type *type; ++ char *body; ++ ++ static const char template[] = ++ "%s tanh(%s x)\n" ++ "{\n" ++ " %s exp_pos, exp_neg;\n" ++ " exp_pos = exp(x);\n" ++ " exp_neg = exp(-x);\n" ++ " return (exp_pos - exp_neg) / (exp_pos + exp_neg);\n" ++ "}\n"; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ type = arg->data_type; ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, ++ type->name, type->name, type->name))) ++ return false; ++ ++ func = hlsl_compile_internal_function(ctx, "tanh", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ + static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) + { +@@ -3818,7 +4042,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + } + + sampler_type = params->args[0]->data_type; +- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER ++ if (sampler_type->class != HLSL_CLASS_SAMPLER + || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) + { + struct vkd3d_string_buffer *string; +@@ -3866,7 +4090,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + return false; + } + +- if (shader_profile_version_ge(ctx, 4, 0)) ++ if (hlsl_version_ge(ctx, 4, 0)) + { + unsigned int count = hlsl_sampler_dim_count(dim); + struct hlsl_ir_node *divisor; +@@ -3913,7 +4137,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + return false; + + initialize_var_components(ctx, params->instrs, var, &idx, coords); +- if (shader_profile_version_ge(ctx, 4, 0)) ++ if (hlsl_version_ge(ctx, 4, 0)) + { + if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) + return false; +@@ -4099,7 +4323,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) + return false; + +- if (shader_profile_version_ge(ctx, 4, 0)) ++ if (hlsl_version_ge(ctx, 4, 0)) + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); + + return true; +@@ -4130,6 +4354,7 @@ intrinsic_functions[] = + {"clamp", 3, true, intrinsic_clamp}, + {"clip", 1, true, intrinsic_clip}, + {"cos", 1, true, intrinsic_cos}, ++ {"cosh", 1, true, intrinsic_cosh}, + {"cross", 2, true, intrinsic_cross}, + {"ddx", 1, true, intrinsic_ddx}, + {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, +@@ -4138,6 +4363,7 @@ intrinsic_functions[] = + {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, + {"ddy_fine", 1, true, intrinsic_ddy_fine}, + {"degrees", 1, true, intrinsic_degrees}, ++ {"determinant", 1, true, intrinsic_determinant}, + {"distance", 2, true, intrinsic_distance}, + {"dot", 2, true, intrinsic_dot}, + {"exp", 1, true, intrinsic_exp}, +@@ -4160,15 +4386,18 @@ intrinsic_functions[] = + {"pow", 2, true, intrinsic_pow}, + {"radians", 1, true, intrinsic_radians}, + {"reflect", 2, true, intrinsic_reflect}, ++ {"refract", 3, true, intrinsic_refract}, + {"round", 1, true, intrinsic_round}, + {"rsqrt", 1, true, intrinsic_rsqrt}, + {"saturate", 1, true, intrinsic_saturate}, + {"sign", 1, true, intrinsic_sign}, + {"sin", 1, true, intrinsic_sin}, ++ {"sinh", 1, true, intrinsic_sinh}, + {"smoothstep", 3, true, intrinsic_smoothstep}, + {"sqrt", 1, true, intrinsic_sqrt}, + {"step", 2, true, intrinsic_step}, + {"tan", 1, true, intrinsic_tan}, ++ {"tanh", 1, true, intrinsic_tanh}, + {"tex1D", -1, false, intrinsic_tex1D}, + {"tex2D", -1, false, intrinsic_tex2D}, + {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, +@@ -4263,22 +4492,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type + return NULL; + + for (i = 0; i < params->args_count; ++i) +- { +- struct hlsl_ir_node *arg = params->args[i]; +- +- if (arg->data_type->class == HLSL_CLASS_OBJECT) +- { +- struct vkd3d_string_buffer *string; +- +- if ((string = hlsl_type_to_string(ctx, arg->data_type))) +- hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid type %s for constructor argument.", string->buffer); +- hlsl_release_string_buffer(ctx, string); +- continue; +- } +- +- initialize_var_components(ctx, params->instrs, var, &idx, arg); +- } ++ initialize_var_components(ctx, params->instrs, var, &idx, params->args[i]); + + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; +@@ -4318,26 +4532,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) + return false; + } +- else if (common_type->dimx == 1 && common_type->dimy == 1) +- { +- common_type = hlsl_get_numeric_type(ctx, cond_type->class, +- common_type->base_type, cond_type->dimx, cond_type->dimy); +- } +- else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) ++ else + { +- /* This condition looks wrong but is correct. +- * floatN is compatible with float1xN, but not with floatNx1. */ +- +- struct vkd3d_string_buffer *cond_string, *value_string; ++ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, ++ cond_type->dimx, cond_type->dimy); ++ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) ++ return false; + +- cond_string = hlsl_type_to_string(ctx, cond_type); +- value_string = hlsl_type_to_string(ctx, common_type); +- if (cond_string && value_string) +- hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Ternary condition type '%s' is not compatible with value type '%s'.", +- cond_string->buffer, value_string->buffer); +- hlsl_release_string_buffer(ctx, cond_string); +- hlsl_release_string_buffer(ctx, value_string); ++ if (common_type->dimx == 1 && common_type->dimy == 1) ++ { ++ common_type = hlsl_get_numeric_type(ctx, cond_type->class, ++ common_type->base_type, cond_type->dimx, cond_type->dimy); ++ } ++ else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) ++ { ++ /* This condition looks wrong but is correct. ++ * floatN is compatible with float1xN, but not with floatNx1. */ ++ ++ struct vkd3d_string_buffer *cond_string, *value_string; ++ ++ cond_string = hlsl_type_to_string(ctx, cond_type); ++ value_string = hlsl_type_to_string(ctx, common_type); ++ if (cond_string && value_string) ++ hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Ternary condition type '%s' is not compatible with value type '%s'.", ++ cond_string->buffer, value_string->buffer); ++ hlsl_release_string_buffer(ctx, cond_string); ++ hlsl_release_string_buffer(ctx, value_string); ++ } + } + + if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) +@@ -4362,9 +4584,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + hlsl_release_string_buffer(ctx, second_string); + } + ++ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, ++ cond_type->dimx, cond_type->dimy); ++ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) ++ return false; ++ + common_type = first->data_type; + } + ++ assert(cond->data_type->base_type == HLSL_TYPE_BOOL); ++ + args[0] = cond; + args[1] = first; + args[2] = second; +@@ -4490,8 +4719,7 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + } + + sampler_type = params->args[0]->data_type; +- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER +- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + +@@ -4555,8 +4783,7 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + } + + sampler_type = params->args[0]->data_type; +- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER +- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) ++ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) + { + struct vkd3d_string_buffer *string; + +@@ -4666,8 +4893,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + } + + sampler_type = params->args[0]->data_type; +- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER +- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + +@@ -4903,8 +5129,7 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * + } + + sampler_type = params->args[0]->data_type; +- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER +- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + +@@ -4966,8 +5191,7 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block + } + + sampler_type = params->args[0]->data_type; +- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER +- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + +@@ -5051,8 +5275,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru + const struct hlsl_type *object_type = object->data_type; + const struct method_function *method; + +- if (object_type->class != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE +- || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) ++ if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + +@@ -5193,6 +5416,16 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + hlsl_release_string_buffer(ctx, string); + } + ++static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry) ++{ ++ if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1, ++ sizeof(*state_block->entries))) ++ return false; ++ ++ state_block->entries[state_block->count++] = entry; ++ return true; ++} ++ + } + + %locations +@@ -5233,6 +5466,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + struct parse_attribute_list attr_list; + struct hlsl_ir_switch_case *switch_case; + struct hlsl_scope *scope; ++ struct hlsl_state_block *state_block; ++ struct state_block_index state_block_index; + } + + %token KW_BLENDSTATE +@@ -5243,6 +5478,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_CENTROID + %token KW_COLUMN_MAJOR + %token KW_COMPILE ++%token KW_COMPUTESHADER + %token KW_CONST + %token KW_CONTINUE + %token KW_DEFAULT +@@ -5250,14 +5486,17 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_DEPTHSTENCILVIEW + %token KW_DISCARD + %token KW_DO ++%token KW_DOMAINSHADER + %token KW_DOUBLE + %token KW_ELSE ++%token KW_EXPORT + %token KW_EXTERN + %token KW_FALSE + %token KW_FOR + %token KW_FXGROUP + %token KW_GEOMETRYSHADER + %token KW_GROUPSHARED ++%token KW_HULLSHADER + %token KW_IF + %token KW_IN + %token KW_INLINE +@@ -5271,7 +5510,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_PACKOFFSET + %token KW_PASS + %token KW_PIXELSHADER +-%token KW_PRECISE + %token KW_RASTERIZERORDEREDBUFFER + %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER + %token KW_RASTERIZERORDEREDTEXTURE1D +@@ -5429,6 +5667,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + + %type any_identifier + %type var_identifier ++%type stateblock_lhs_identifier + %type name_opt + + %type parameter +@@ -5443,6 +5682,10 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + + %type semantic + ++%type state_block ++ ++%type state_block_index_opt ++ + %type switch_case + + %type field_type +@@ -5453,6 +5696,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %type type_no_void + %type typedef_type + ++%type state_block_list + %type type_spec + %type variable_decl + %type variable_def +@@ -5483,9 +5727,9 @@ name_opt: + | any_identifier + + pass: +- KW_PASS name_opt annotations_opt '{' '}' ++ KW_PASS name_opt annotations_opt '{' state_block_start state_block '}' + { +- if (!add_pass(ctx, $2, $3, &@1)) ++ if (!add_pass(ctx, $2, $3, $6, &@1)) + YYABORT; + } + +@@ -5535,10 +5779,6 @@ technique10: + struct hlsl_scope *scope = ctx->cur_scope; + hlsl_pop_scope(ctx); + +- if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT && ctx->profile->major_version == 2) +- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "The 'technique10' keyword is invalid for this profile."); +- + if (!add_technique(ctx, $2, scope, $3, "technique10", &@1)) + YYABORT; + } +@@ -5580,12 +5820,12 @@ effect_group: + } + + buffer_declaration: +- buffer_type any_identifier colon_attribute ++ var_modifiers buffer_type any_identifier colon_attribute annotations_opt + { +- if ($3.semantic.name) +- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); ++ if ($4.semantic.name) ++ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); + +- if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) ++ if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $2, $3, $1, &$4.reg_reservation, $5, &@3))) + YYABORT; + } + +@@ -5884,9 +6124,9 @@ func_prototype_no_attrs: + /* Functions are unconditionally inlined. */ + modifiers &= ~HLSL_MODIFIER_INLINE; + +- if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) ++ if (modifiers & ~(HLSL_MODIFIERS_MAJORITY_MASK | HLSL_MODIFIER_EXPORT)) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +- "Only majority modifiers are allowed on functions."); ++ "Unexpected modifier used on a function."); + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + YYABORT; + if ((var = hlsl_get_var(ctx->globals, $3))) +@@ -6388,7 +6628,7 @@ type_no_void: + { + validate_texture_format_type(ctx, $3, &@3); + +- if (shader_profile_version_lt(ctx, 4, 1)) ++ if (hlsl_version_lt(ctx, 4, 1)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); +@@ -6427,7 +6667,7 @@ type_no_void: + $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); + if ($$->is_minimum_precision) + { +- if (shader_profile_version_lt(ctx, 4, 0)) ++ if (hlsl_version_lt(ctx, 4, 0)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Target profile doesn't support minimum-precision types."); +@@ -6454,6 +6694,14 @@ type_no_void: + { + $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); + } ++ | KW_VERTEXSHADER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true); ++ } ++ | KW_PIXELSHADER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); ++ } + + type: + type_no_void +@@ -6583,22 +6831,91 @@ variable_decl: + $$->reg_reservation = $3.reg_reservation; + } + +-state: +- any_identifier '=' expr ';' ++state_block_start: ++ %empty + { +- vkd3d_free($1); +- destroy_block($3); ++ ctx->in_state_block = 1; + } + +-state_block_start: ++stateblock_lhs_identifier: ++ any_identifier ++ { ++ $$ = $1; ++ } ++ | KW_PIXELSHADER ++ { ++ if (!($$ = hlsl_strdup(ctx, "pixelshader"))) ++ YYABORT; ++ } ++ | KW_VERTEXSHADER ++ { ++ if (!($$ = hlsl_strdup(ctx, "vertexshader"))) ++ YYABORT; ++ } ++ ++state_block_index_opt: + %empty + { +- ctx->in_state_block = 1; ++ $$.has_index = false; ++ $$.index = 0; + } ++ | '[' C_INTEGER ']' ++ { ++ if ($2 < 0) ++ { ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, ++ "State block array index is not a positive integer constant."); ++ YYABORT; ++ } ++ $$.has_index = true; ++ $$.index = $2; ++ } + + state_block: + %empty +- | state_block state ++ { ++ if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) ++ YYABORT; ++ } ++ | state_block stateblock_lhs_identifier state_block_index_opt '=' complex_initializer ';' ++ { ++ struct hlsl_state_block_entry *entry; ++ ++ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) ++ YYABORT; ++ ++ entry->name = $2; ++ entry->lhs_has_index = $3.has_index; ++ entry->lhs_index = $3.index; ++ ++ entry->instrs = $5.instrs; ++ entry->args = $5.args; ++ entry->args_count = $5.args_count; ++ ++ $$ = $1; ++ state_block_add_entry($$, entry); ++ } ++ ++state_block_list: ++ '{' state_block '}' ++ { ++ if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) ++ YYABORT; ++ ++ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, ++ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) ++ YYABORT; ++ $$->state_blocks[$$->state_block_count++] = $2; ++ } ++ | state_block_list ',' '{' state_block '}' ++ { ++ $$ = $1; ++ ++ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, ++ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) ++ YYABORT; ++ $$->state_blocks[$$->state_block_count++] = $4; ++ } + + variable_def: + variable_decl +@@ -6611,6 +6928,24 @@ variable_def: + { + $$ = $1; + ctx->in_state_block = 0; ++ ++ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, ++ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) ++ YYABORT; ++ $$->state_blocks[$$->state_block_count++] = $4; ++ } ++ | variable_decl '{' state_block_start state_block_list '}' ++ { ++ $$ = $1; ++ ctx->in_state_block = 0; ++ ++ $$->state_blocks = $4->state_blocks; ++ $$->state_block_count = $4->state_block_count; ++ $$->state_block_capacity = $4->state_block_capacity; ++ $4->state_blocks = NULL; ++ $4->state_block_count = 0; ++ $4->state_block_capacity = 0; ++ free_parse_variable_def($4); + } + + variable_def_typed: +@@ -6727,10 +7062,6 @@ var_modifiers: + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOPERSPECTIVE, &@1); + } +- | KW_PRECISE var_modifiers +- { +- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); +- } + | KW_SHARED var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); +@@ -6779,7 +7110,20 @@ var_modifiers: + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); + } +- ++ | KW_EXPORT var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_EXPORT, &@1); ++ } ++ | var_identifier var_modifiers ++ { ++ if (!strcmp($1, "precise")) ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); ++ else if (!strcmp($1, "single")) ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SINGLE, &@1); ++ else ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER, ++ "Unknown modifier %s.", debugstr_a($1)); ++ } + + complex_initializer: + initializer_expr +@@ -7227,15 +7571,13 @@ primary_expr: + { + if (ctx->in_state_block) + { +- struct hlsl_ir_load *load; +- struct hlsl_ir_var *var; ++ struct hlsl_ir_node *constant; + +- if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", +- hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) +- YYABORT; +- if (!(load = hlsl_new_var_load(ctx, var, &@1))) ++ if (!(constant = hlsl_new_stateblock_constant(ctx, $1, &@1))) + YYABORT; +- if (!($$ = make_block(ctx, &load->node))) ++ vkd3d_free($1); ++ ++ if (!($$ = make_block(ctx, constant))) + YYABORT; + } + else +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 307f86f55b7..f6cccfe8bea 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -427,7 +427,10 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * + { + field = &type->e.record.fields[i]; + if (hlsl_type_is_resource(field->type)) ++ { ++ hlsl_fixme(ctx, &field->loc, "Prepend uniform copies for resource components within structs."); + continue; ++ } + validate_field_semantic(ctx, field); + semantic = &field->semantic; + elem_semantic_index = semantic->index; +@@ -1562,7 +1565,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, + var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), + new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); + +- if (instr->data_type->class != HLSL_CLASS_OBJECT) ++ if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR) + { + struct hlsl_ir_node *swizzle_node; + +@@ -1622,6 +1625,9 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: + case HLSL_CLASS_OBJECT: + break; + +@@ -1631,6 +1637,10 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + /* FIXME: Actually we shouldn't even get here, but we don't split + * matrices yet. */ + return false; ++ ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_VOID: ++ vkd3d_unreachable(); + } + + if (copy_propagation_replace_with_constant_vector(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) +@@ -1739,7 +1749,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s + { + unsigned int writemask = store->writemask; + +- if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) ++ if (!hlsl_is_numeric_type(store->rhs.node->data_type)) + writemask = VKD3DSP_WRITEMASK_0; + copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index); + } +@@ -2603,8 +2613,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + + hlsl_copy_deref(ctx, &load->sampler, &load->resource); + load->resource.var = var; +- assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); +- assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); ++ assert(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); ++ assert(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); + + return true; + } +@@ -2647,10 +2657,11 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) + return false; + } + +-/* Append a FLOOR before a CAST to int or uint (which is written as a mere MOV). */ ++/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */ + static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { +- struct hlsl_ir_node *arg, *floor, *cast2; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; ++ struct hlsl_ir_node *arg, *floor, *res; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) +@@ -2665,17 +2676,15 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + if (arg->data_type->base_type != HLSL_TYPE_FLOAT && arg->data_type->base_type != HLSL_TYPE_HALF) + return false; + +- /* Check that the argument is not already a FLOOR */ +- if (arg->type == HLSL_IR_EXPR && hlsl_ir_expr(arg)->op == HLSL_OP1_FLOOR) +- return false; +- + if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) + return false; + hlsl_block_add_instr(block, floor); + +- if (!(cast2 = hlsl_new_cast(ctx, floor, instr->data_type, &instr->loc))) ++ memset(operands, 0, sizeof(operands)); ++ operands[0] = floor; ++ if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) + return false; +- hlsl_block_add_instr(block, cast2); ++ hlsl_block_add_instr(block, res); + + return true; + } +@@ -2903,12 +2912,60 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct + return true; + } + +-/* Use 'movc' for the ternary operator. */ ++static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; ++ struct hlsl_ir_node *arg, *arg_cast, *neg, *one, *sub, *res; ++ struct hlsl_constant_value one_value; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_expr *expr; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP1_LOGIC_NOT) ++ return false; ++ ++ arg = expr->operands[0].node; ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); ++ ++ /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ ++ assert(arg->data_type->base_type == HLSL_TYPE_BOOL); ++ ++ if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg_cast); ++ ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg_cast, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg); ++ ++ one_value.u[0].f = 1.0; ++ one_value.u[1].f = 1.0; ++ one_value.u[2].f = 1.0; ++ one_value.u[3].f = 1.0; ++ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, one); ++ ++ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg))) ++ return false; ++ hlsl_block_add_instr(block, sub); ++ ++ memset(operands, 0, sizeof(operands)); ++ operands[0] = sub; ++ if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, res); ++ ++ return true; ++} ++ ++/* Lower TERNARY to CMP for SM1. */ + static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; +- struct hlsl_ir_node *zero, *cond, *first, *second; +- struct hlsl_constant_value zero_value = { 0 }; ++ struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg; + struct hlsl_ir_expr *expr; + struct hlsl_type *type; + +@@ -2929,55 +2986,282 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + return false; + } + +- if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ assert(cond->data_type->base_type == HLSL_TYPE_BOOL); ++ ++ type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, ++ instr->data_type->dimx, instr->data_type->dimy); ++ ++ if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, float_cond); ++ ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, float_cond, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg); ++ ++ memset(operands, 0, sizeof(operands)); ++ operands[0] = neg; ++ operands[1] = second; ++ operands[2] = first; ++ if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) ++ return false; ++ ++ hlsl_block_add_instr(block, replacement); ++ return true; ++} ++ ++static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, ++ struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res, *ret; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_expr *expr; ++ bool negate = false; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS ++ && expr->op != HLSL_OP2_GEQUAL) ++ return false; ++ ++ arg1 = expr->operands[0].node; ++ arg2 = expr->operands[1].node; ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ ++ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg1_cast); ++ ++ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg2_cast); ++ ++ switch (expr->op) ++ { ++ case HLSL_OP2_EQUAL: ++ case HLSL_OP2_NEQUAL: ++ { ++ struct hlsl_ir_node *neg, *sub, *abs, *abs_neg; ++ ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg); ++ ++ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) ++ return false; ++ hlsl_block_add_instr(block, sub); ++ ++ if (ctx->profile->major_version >= 3) ++ { ++ if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, sub, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, abs); ++ } ++ else ++ { ++ /* Use MUL as a precarious ABS. */ ++ if (!(abs = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub))) ++ return false; ++ hlsl_block_add_instr(block, abs); ++ } ++ ++ if (!(abs_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, abs_neg); ++ ++ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, abs_neg, abs))) ++ return false; ++ hlsl_block_add_instr(block, slt); ++ ++ negate = (expr->op == HLSL_OP2_EQUAL); ++ break; ++ } ++ ++ case HLSL_OP2_GEQUAL: ++ case HLSL_OP2_LESS: ++ { ++ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg1_cast, arg2_cast))) ++ return false; ++ hlsl_block_add_instr(block, slt); ++ ++ negate = (expr->op == HLSL_OP2_GEQUAL); ++ break; ++ } ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (negate) + { +- struct hlsl_ir_node *abs, *neg; ++ struct hlsl_constant_value one_value; ++ struct hlsl_ir_node *one, *slt_neg; + +- if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) ++ one_value.u[0].f = 1.0; ++ one_value.u[1].f = 1.0; ++ one_value.u[2].f = 1.0; ++ one_value.u[3].f = 1.0; ++ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; +- hlsl_block_add_instr(block, abs); ++ hlsl_block_add_instr(block, one); + +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) ++ if (!(slt_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) + return false; +- hlsl_block_add_instr(block, neg); ++ hlsl_block_add_instr(block, slt_neg); + +- operands[0] = neg; +- operands[1] = second; +- operands[2] = first; +- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) ++ if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, slt_neg))) + return false; +- } +- else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) +- { +- hlsl_fixme(ctx, &instr->loc, "Ternary operator is not implemented for %s profile.", ctx->profile->name); +- return false; ++ hlsl_block_add_instr(block, res); + } + else + { +- if (cond->data_type->base_type == HLSL_TYPE_FLOAT) +- { +- if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, zero); ++ res = slt; ++ } + +- operands[0] = zero; +- operands[1] = cond; +- type = cond->data_type; +- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); +- if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, cond); +- } ++ /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT, ++ * and casts to BOOL have already been lowered to "!= 0". */ ++ memset(operands, 0, sizeof(operands)); ++ operands[0] = res; ++ if (!(ret = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, ret); ++ ++ return true; ++} ++ ++/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to ++ * CMP instructions (only available in pixel shaders). ++ * Based on the following equivalence: ++ * SLT(x, y) ++ * = (x < y) ? 1.0 : 0.0 ++ * = ((x - y) >= 0) ? 0.0 : 1.0 ++ * = CMP(x - y, 0.0, 1.0) ++ */ ++static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; ++ struct hlsl_constant_value zero_value, one_value; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_expr *expr; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP2_SLT) ++ return false; ++ ++ arg1 = expr->operands[0].node; ++ arg2 = expr->operands[1].node; ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ ++ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg1_cast); + +- memset(operands, 0, sizeof(operands)); +- operands[0] = cond; +- operands[1] = first; +- operands[2] = second; +- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) ++ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg2_cast); ++ ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg); ++ ++ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) ++ return false; ++ hlsl_block_add_instr(block, sub); ++ ++ memset(&zero_value, 0, sizeof(zero_value)); ++ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, zero); ++ ++ one_value.u[0].f = 1.0; ++ one_value.u[1].f = 1.0; ++ one_value.u[2].f = 1.0; ++ one_value.u[3].f = 1.0; ++ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, one); ++ ++ if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one))) ++ return false; ++ hlsl_block_add_instr(block, cmp); ++ ++ return true; ++} ++ ++/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to ++ * SLT instructions (only available in vertex shaders). ++ * Based on the following equivalence: ++ * CMP(x, y, z) ++ * = (x >= 0) ? y : z ++ * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0) ++ * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0)) ++ */ ++static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2, *add; ++ struct hlsl_constant_value zero_value, one_value; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_expr *expr; ++ unsigned int i; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP3_CMP) ++ return false; ++ ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ ++ for (i = 0; i < 3; ++i) ++ { ++ args[i] = expr->operands[i].node; ++ ++ if (!(args_cast[i] = hlsl_new_cast(ctx, args[i], float_type, &instr->loc))) + return false; ++ hlsl_block_add_instr(block, args_cast[i]); + } + +- hlsl_block_add_instr(block, replacement); ++ memset(&zero_value, 0, sizeof(zero_value)); ++ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, zero); ++ ++ one_value.u[0].f = 1.0; ++ one_value.u[1].f = 1.0; ++ one_value.u[2].f = 1.0; ++ one_value.u[3].f = 1.0; ++ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, one); ++ ++ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, args_cast[0], zero))) ++ return false; ++ hlsl_block_add_instr(block, slt); ++ ++ if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[2], slt))) ++ return false; ++ hlsl_block_add_instr(block, mul1); ++ ++ if (!(neg_slt = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg_slt); ++ ++ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg_slt))) ++ return false; ++ hlsl_block_add_instr(block, sub); ++ ++ if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[1], sub))) ++ return false; ++ hlsl_block_add_instr(block, mul2); ++ ++ if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul1, mul2))) ++ return false; ++ hlsl_block_add_instr(block, add); ++ + return true; + } + +@@ -3018,11 +3302,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) + { ++ struct hlsl_type *cond_type = condition->data_type; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_ir_node *cond; + + assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); + ++ if (cond_type->base_type != HLSL_TYPE_BOOL) ++ { ++ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); ++ ++ if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) ++ return NULL; ++ hlsl_block_add_instr(instrs, condition); ++ } ++ + operands[0] = condition; + operands[1] = if_true; + operands[2] = if_false; +@@ -3308,6 +3602,63 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + return true; + } + ++static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_expr *expr; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op == HLSL_OP1_CAST || instr->data_type->base_type == HLSL_TYPE_FLOAT) ++ return false; ++ ++ switch (expr->op) ++ { ++ case HLSL_OP1_ABS: ++ case HLSL_OP1_NEG: ++ case HLSL_OP2_ADD: ++ case HLSL_OP2_DIV: ++ case HLSL_OP2_LOGIC_AND: ++ case HLSL_OP2_LOGIC_OR: ++ case HLSL_OP2_MAX: ++ case HLSL_OP2_MIN: ++ case HLSL_OP2_MUL: ++ { ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_ir_node *arg, *arg_cast, *float_expr, *ret; ++ struct hlsl_type *float_type; ++ unsigned int i; ++ ++ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) ++ { ++ arg = expr->operands[i].node; ++ if (!arg) ++ continue; ++ ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); ++ if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg_cast); ++ ++ operands[i] = arg_cast; ++ } ++ ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, float_expr); ++ ++ if (!(ret = hlsl_new_cast(ctx, float_expr, instr->data_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, ret); ++ ++ return true; ++ } ++ default: ++ return false; ++ } ++} ++ + static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; +@@ -3402,6 +3753,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_SWITCH: + break; ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ /* Stateblock constants should not appear in the shader program. */ ++ vkd3d_unreachable(); + } + + return false; +@@ -3457,9 +3811,6 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + { + unsigned int r; + +- if (!hlsl_type_is_resource(var->data_type)) +- continue; +- + if (var->reg_reservation.reg_type) + { + for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) +@@ -3493,6 +3844,22 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + } + } + ++static void deref_mark_last_read(struct hlsl_deref *deref, unsigned int last_read) ++{ ++ unsigned int i; ++ ++ if (hlsl_deref_is_lowered(deref)) ++ { ++ if (deref->rel_offset.node) ++ deref->rel_offset.node->last_read = last_read; ++ } ++ else ++ { ++ for (i = 0; i < deref->path_len; ++i) ++ deref->path[i].node->last_read = last_read; ++ } ++} ++ + /* Compute the earliest and latest liveness for each variable. In the case that + * a variable is accessed inside of a loop, we promote its liveness to extend + * to at least the range of the entire loop. We also do this for nodes, so that +@@ -3512,6 +3879,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + case HLSL_IR_CALL: + /* We should have inlined all calls before computing liveness. */ + vkd3d_unreachable(); ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ /* Stateblock constants should not appear in the shader program. */ ++ vkd3d_unreachable(); + + case HLSL_IR_STORE: + { +@@ -3521,8 +3891,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + if (!var->first_write) + var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; + store->rhs.node->last_read = last_read; +- if (store->lhs.rel_offset.node) +- store->lhs.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&store->lhs, last_read); + break; + } + case HLSL_IR_EXPR: +@@ -3549,8 +3918,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + + var = load->src.var; + var->last_read = max(var->last_read, last_read); +- if (load->src.rel_offset.node) +- load->src.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&load->src, last_read); + break; + } + case HLSL_IR_LOOP: +@@ -3567,14 +3935,12 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + + var = load->resource.var; + var->last_read = max(var->last_read, last_read); +- if (load->resource.rel_offset.node) +- load->resource.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&load->resource, last_read); + + if ((var = load->sampler.var)) + { + var->last_read = max(var->last_read, last_read); +- if (load->sampler.rel_offset.node) +- load->sampler.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&load->sampler, last_read); + } + + if (load->coords.node) +@@ -3599,8 +3965,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + + var = store->resource.var; + var->last_read = max(var->last_read, last_read); +- if (store->resource.rel_offset.node) +- store->resource.rel_offset.node->last_read = last_read; ++ deref_mark_last_read(&store->resource, last_read); + store->coords.node->last_read = last_read; + store->value.node->last_read = last_read; + break; +@@ -4435,7 +4800,9 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) + continue; + + if (var1->reg_reservation.offset_type +- || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) ++ || var1->reg_reservation.reg_type == 's' ++ || var1->reg_reservation.reg_type == 't' ++ || var1->reg_reservation.reg_type == 'u') + buffer->manually_packed_elements = true; + else + buffer->automatically_packed_elements = true; +@@ -4885,25 +5252,6 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a + } + } + +-static bool type_has_object_components(struct hlsl_type *type) +-{ +- if (type->class == HLSL_CLASS_OBJECT) +- return true; +- if (type->class == HLSL_CLASS_ARRAY) +- return type_has_object_components(type->e.array.type); +- if (type->class == HLSL_CLASS_STRUCT) +- { +- unsigned int i; +- +- for (i = 0; i < type->e.record.field_count; ++i) +- { +- if (type_has_object_components(type->e.record.fields[i].type)) +- return true; +- } +- } +- return false; +-} +- + static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) + { + struct hlsl_ir_node *instr, *next; +@@ -5011,9 +5359,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + } + else + { +- if (type_has_object_components(var->data_type)) +- hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); +- + if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT + && !var->semantic.name) + { +@@ -5067,11 +5412,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); + + lower_ir(ctx, lower_narrowing_casts, body); +- lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_int_dot, body); + lower_ir(ctx, lower_int_division, body); + lower_ir(ctx, lower_int_modulus, body); + lower_ir(ctx, lower_int_abs, body); ++ lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_float_modulus, body); + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + do +@@ -5098,9 +5443,15 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + sort_synthetic_separated_samplers_first(ctx); + +- lower_ir(ctx, lower_ternary, body); + if (profile->major_version < 4) + { ++ lower_ir(ctx, lower_ternary, body); ++ ++ lower_ir(ctx, lower_nonfloat_exprs, body); ++ /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ ++ hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); ++ lower_ir(ctx, lower_casts_to_bool, body); ++ + lower_ir(ctx, lower_casts_to_int, body); + lower_ir(ctx, lower_division, body); + lower_ir(ctx, lower_sqrt, body); +@@ -5108,6 +5459,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + lower_ir(ctx, lower_round, body); + lower_ir(ctx, lower_ceil, body); + lower_ir(ctx, lower_floor, body); ++ lower_ir(ctx, lower_comparison_operators, body); ++ lower_ir(ctx, lower_logic_not, body); ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ lower_ir(ctx, lower_slt, body); ++ else ++ lower_ir(ctx, lower_cmp, body); + } + + if (profile->major_version < 2) +@@ -5117,6 +5474,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + + lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); + ++ do ++ compute_liveness(ctx, entry_func); ++ while (hlsl_transform_ir(ctx, dce, body, NULL)); ++ + /* TODO: move forward, remove when no longer needed */ + transform_derefs(ctx, replace_deref_path_with_offset, body); + while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +index b76b1fce507..4cea98e9286 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +@@ -1177,30 +1177,11 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + assert(dst_type->base_type == src2->node.data_type->base_type); + assert(dst_type->base_type == src3->node.data_type->base_type); ++ assert(src1->node.data_type->base_type == HLSL_TYPE_BOOL); + + for (k = 0; k < dst_type->dimx; ++k) +- { +- switch (src1->node.data_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- dst->u[k] = src1->value.u[k].f != 0.0f ? src2->value.u[k] : src3->value.u[k]; +- break; +- +- case HLSL_TYPE_DOUBLE: +- dst->u[k] = src1->value.u[k].d != 0.0 ? src2->value.u[k] : src3->value.u[k]; +- break; ++ dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; + +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- case HLSL_TYPE_BOOL: +- dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; +- break; +- +- default: +- vkd3d_unreachable(); +- } +- } + return true; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index f0bd85338c6..eca18f4eb28 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -17,6 +17,7 @@ + */ + + #include "vkd3d_shader_private.h" ++#include "vkd3d_types.h" + + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) + { +@@ -32,6 +33,9 @@ void vsir_program_cleanup(struct vsir_program *program) + vkd3d_free((void *)program->block_names[i]); + vkd3d_free(program->block_names); + shader_instruction_array_destroy(&program->instructions); ++ shader_signature_cleanup(&program->input_signature); ++ shader_signature_cleanup(&program->output_signature); ++ shader_signature_cleanup(&program->patch_constant_signature); + } + + static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) +@@ -53,19 +57,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i + vsir_instruction_init(ins, &location, VKD3DSIH_NOP); + } + +-static void remove_dcl_temps(struct vsir_program *program) +-{ +- unsigned int i; +- +- for (i = 0; i < program->instructions.count; ++i) +- { +- struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; +- +- if (ins->handler_idx == VKD3DSIH_DCL_TEMPS) +- vkd3d_shader_instruction_make_nop(ins); +- } +-} +- + static bool vsir_instruction_init_with_params(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, + enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) +@@ -91,86 +82,164 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, + return true; + } + +-static enum vkd3d_result instruction_array_lower_texkills(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, ++ struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) + { +- struct vsir_program *program = &parser->program; ++ const unsigned int components_read = 3 + (program->shader_version.major >= 2); + struct vkd3d_shader_instruction_array *instructions = &program->instructions; +- struct vkd3d_shader_instruction *texkill_ins, *ins; +- unsigned int components_read = 3 + (program->shader_version.major >= 2); +- unsigned int tmp_idx = ~0u; +- unsigned int i, k; +- +- for (i = 0; i < instructions->count; ++i) +- { +- texkill_ins = &instructions->elements[i]; ++ size_t pos = texkill - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int j; + +- if (texkill_ins->handler_idx != VKD3DSIH_TEXKILL) +- continue; ++ if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- if (!shader_instruction_array_insert_at(instructions, i + 1, components_read + 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ if (*tmp_idx == ~0u) ++ *tmp_idx = program->temp_count++; + +- if (tmp_idx == ~0u) +- tmp_idx = program->temp_count++; ++ /* tmp = ins->dst[0] < 0 */ + +- /* tmp = ins->dst[0] < 0 */ ++ ins = &instructions->elements[pos + 1]; ++ if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_LTO, 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- ins = &instructions->elements[i + 1]; +- if (!vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_LTO, 1, 2)) ++ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->dst[0].reg.idx[0].offset = *tmp_idx; ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; ++ ++ ins->src[0].reg = texkill->dst[0].reg; ++ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[1].reg.u.immconst_f32[0] = 0.0f; ++ ins->src[1].reg.u.immconst_f32[1] = 0.0f; ++ ins->src[1].reg.u.immconst_f32[2] = 0.0f; ++ ins->src[1].reg.u.immconst_f32[3] = 0.0f; ++ ++ /* tmp.x = tmp.x || tmp.y */ ++ /* tmp.x = tmp.x || tmp.z */ ++ /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ ++ ++ for (j = 1; j < components_read; ++j) ++ { ++ ins = &instructions->elements[pos + 1 + j]; ++ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_OR, 1, 2))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->dst[0].reg.idx[0].offset = tmp_idx; +- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; ++ ins->dst[0].reg.idx[0].offset = *tmp_idx; ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; + +- ins->src[0].reg = texkill_ins->dst[0].reg; +- vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].reg.idx[0].offset = *tmp_idx; ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->src[1].reg.u.immconst_f32[0] = 0.0f; +- ins->src[1].reg.u.immconst_f32[1] = 0.0f; +- ins->src[1].reg.u.immconst_f32[2] = 0.0f; +- ins->src[1].reg.u.immconst_f32[3] = 0.0f; ++ ins->src[1].reg.idx[0].offset = *tmp_idx; ++ ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j); ++ } + +- /* tmp.x = tmp.x || tmp.y */ +- /* tmp.x = tmp.x || tmp.z */ +- /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ ++ /* discard_nz tmp.x */ + +- for (k = 1; k < components_read; ++k) +- { +- ins = &instructions->elements[i + 1 + k]; +- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_OR, 1, 2))) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &instructions->elements[pos + 1 + components_read]; ++ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_DISCARD, 0, 1))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + +- vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +- ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->dst[0].reg.idx[0].offset = tmp_idx; +- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; +- +- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->src[0].reg.idx[0].offset = tmp_idx; +- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +- vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +- ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->src[1].reg.idx[0].offset = tmp_idx; +- ins->src[1].swizzle = vkd3d_shader_create_swizzle(k, k, k, k); +- } ++ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].reg.idx[0].offset = *tmp_idx; ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + +- /* discard_nz tmp.x */ ++ /* Make the original instruction no-op */ ++ vkd3d_shader_instruction_make_nop(texkill); + +- ins = &instructions->elements[i + 1 + components_read]; +- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_DISCARD, 0, 1))) +- return VKD3D_ERROR_OUT_OF_MEMORY; +- ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; ++ return VKD3D_OK; ++} + +- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; +- ins->src[0].reg.idx[0].offset = tmp_idx; +- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++/* The Shader Model 5 Assembly documentation states: "If components of a mad ++ * instruction are tagged as precise, the hardware must execute a mad instruction ++ * or the exact equivalent, and it cannot split it into a multiply followed by an add." ++ * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is ++ * not fused for "precise" operations." ++ * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ ++static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program, ++ struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ struct vkd3d_shader_instruction *mul_ins, *add_ins; ++ size_t pos = mad - instructions->elements; ++ struct vkd3d_shader_dst_param *mul_dst; ++ ++ if (!(mad->flags & VKD3DSI_PRECISE_XYZW)) ++ return VKD3D_OK; ++ ++ if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ if (*tmp_idx == ~0u) ++ *tmp_idx = program->temp_count++; ++ ++ mul_ins = &instructions->elements[pos]; ++ add_ins = &instructions->elements[pos + 1]; ++ ++ mul_ins->handler_idx = VKD3DSIH_MUL; ++ mul_ins->src_count = 2; ++ ++ if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW; ++ ++ mul_dst = mul_ins->dst; ++ *add_ins->dst = *mul_dst; ++ ++ mul_dst->modifiers = 0; ++ vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1); ++ mul_dst->reg.dimension = add_ins->dst->reg.dimension; ++ mul_dst->reg.idx[0].offset = *tmp_idx; ++ ++ add_ins->src[0].reg = mul_dst->reg; ++ add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask); ++ add_ins->src[0].modifiers = 0; ++ add_ins->src[1] = mul_ins->src[2]; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ unsigned int tmp_idx = ~0u, i; ++ enum vkd3d_result ret; ++ ++ for (i = 0; i < instructions->count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &instructions->elements[i]; ++ ++ switch (ins->handler_idx) ++ { ++ case VKD3DSIH_TEXKILL: ++ if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) ++ return ret; ++ break; ++ ++ case VKD3DSIH_MAD: ++ if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0) ++ return ret; ++ break; ++ ++ case VKD3DSIH_DCL_CONSTANT_BUFFER: ++ case VKD3DSIH_DCL_TEMPS: ++ vkd3d_shader_instruction_make_nop(ins); ++ break; + +- /* Make the original instruction no-op */ +- vkd3d_shader_instruction_make_nop(texkill_ins); ++ default: ++ break; ++ } + } + + return VKD3D_OK; +@@ -227,10 +296,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( + return NULL; + } + +-static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_compile_info *compile_info) ++static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) + { +- struct shader_signature *signature = &parser->shader_desc.output_signature; ++ const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; ++ struct shader_signature *signature = &program->output_signature; + const struct vkd3d_shader_varying_map_info *varying_map; + unsigned int i; + +@@ -252,7 +322,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars + * location with a different mask. */ + if (input_mask && input_mask != e->mask) + { +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "Output mask %#x does not match input mask %#x.", + e->mask, input_mask); +@@ -269,7 +339,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars + { + if (varying_map->varying_map[i].output_signature_index >= signature->element_count) + { +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "The next stage consumes varyings not written by this stage."); + return VKD3D_ERROR_NOT_IMPLEMENTED; +@@ -453,7 +523,7 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader + + void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) + { +- vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UINT, 1); ++ vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); + param->reg.dimension = VSIR_DIMENSION_NONE; + param->reg.idx[0].offset = label_id; + } +@@ -464,12 +534,24 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned + src->reg.idx[0].offset = idx; + } + ++static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) ++{ ++ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); ++ src->reg.idx[0].offset = idx; ++} ++ + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) + { + vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); + dst->reg.idx[0].offset = idx; + } + ++static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++{ ++ vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); ++ dst->reg.idx[0].offset = idx; ++} ++ + static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) + { + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +@@ -1383,10 +1465,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + } + } + +-static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) + { +- struct io_normaliser normaliser = {parser->program.instructions}; +- struct vsir_program *program = &parser->program; ++ struct io_normaliser normaliser = {program->instructions}; + struct vkd3d_shader_instruction *ins; + bool has_control_point_phase; + unsigned int i, j; +@@ -1394,9 +1475,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse + normaliser.phase = VKD3DSIH_INVALID; + normaliser.shader_type = program->shader_version.type; + normaliser.major = program->shader_version.major; +- normaliser.input_signature = &parser->shader_desc.input_signature; +- normaliser.output_signature = &parser->shader_desc.output_signature; +- normaliser.patch_constant_signature = &parser->shader_desc.patch_constant_signature; ++ normaliser.input_signature = &program->input_signature; ++ normaliser.output_signature = &program->output_signature; ++ normaliser.patch_constant_signature = &program->patch_constant_signature; + + for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) + { +@@ -1439,9 +1520,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse + } + } + +- if (!shader_signature_merge(&parser->shader_desc.input_signature, normaliser.input_range_map, false) +- || !shader_signature_merge(&parser->shader_desc.output_signature, normaliser.output_range_map, false) +- || !shader_signature_merge(&parser->shader_desc.patch_constant_signature, normaliser.pc_range_map, true)) ++ if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) ++ || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) ++ || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) + { + program->instructions = normaliser.instructions; + return VKD3D_ERROR_OUT_OF_MEMORY; +@@ -1668,19 +1749,20 @@ static void remove_dead_code(struct vsir_program *program) + } + } + +-static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) + { + unsigned int i; + +- for (i = 0; i < parser->program.instructions.count; ++i) ++ for (i = 0; i < program->instructions.count; ++i) + { +- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + struct vkd3d_shader_src_param *srcs; + + switch (ins->handler_idx) + { + case VKD3DSIH_TEX: +- if (!(srcs = shader_src_param_allocator_get(&parser->program.instructions.src_params, 3))) ++ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memset(srcs, 0, sizeof(*srcs) * 3); + +@@ -1723,7 +1805,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser + case VKD3DSIH_TEXREG2AR: + case VKD3DSIH_TEXREG2GB: + case VKD3DSIH_TEXREG2RGB: +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "Combined sampler instruction %#x.", ins->handler_idx); + return VKD3D_ERROR_NOT_IMPLEMENTED; +@@ -1789,10 +1871,10 @@ struct cf_flattener_info + + struct cf_flattener + { +- struct vkd3d_shader_parser *parser; ++ struct vsir_program *program; + + struct vkd3d_shader_location location; +- bool allocation_failed; ++ enum vkd3d_result status; + + struct vkd3d_shader_instruction *instructions; + size_t instruction_capacity; +@@ -1812,13 +1894,20 @@ struct cf_flattener + size_t control_flow_info_size; + }; + ++static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error) ++{ ++ if (flattener->status != VKD3D_OK) ++ return; ++ flattener->status = error; ++} ++ + static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count) + { + if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity, + flattener->instruction_count + count, sizeof(*flattener->instructions))) + { + ERR("Failed to allocate instructions.\n"); +- flattener->allocation_failed = true; ++ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); + return NULL; + } + return &flattener->instructions[flattener->instruction_count]; +@@ -1850,9 +1939,9 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ + { + struct vkd3d_shader_src_param *params; + +- if (!(params = vsir_program_get_src_params(&flattener->parser->program, count))) ++ if (!(params = vsir_program_get_src_params(flattener->program, count))) + { +- flattener->allocation_failed = true; ++ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); + return NULL; + } + ins->src = params; +@@ -1866,10 +1955,10 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int + + if (!(ins = cf_flattener_require_space(flattener, 1))) + return; +- if (vsir_instruction_init_label(ins, &flattener->location, label_id, &flattener->parser->program)) ++ if (vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program)) + ++flattener->instruction_count; + else +- flattener->allocation_failed = true; ++ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); + } + + /* For conditional branches, this returns the false target branch parameter. */ +@@ -1947,7 +2036,7 @@ static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_ + flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info))) + { + ERR("Failed to allocate control flow info structure.\n"); +- flattener->allocation_failed = true; ++ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); + return NULL; + } + +@@ -2014,12 +2103,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla + flattener->block_names[block_id] = buffer.buffer; + } + +-static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener) ++static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, ++ struct vkd3d_shader_message_context *message_context) + { + bool main_block_open, is_hull_shader, after_declarations_section; +- struct vkd3d_shader_parser *parser = flattener->parser; + struct vkd3d_shader_instruction_array *instructions; +- struct vsir_program *program = &parser->program; ++ struct vsir_program *program = flattener->program; + struct vkd3d_shader_instruction *dst_ins; + size_t i; + +@@ -2041,12 +2130,19 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + flattener->location = instruction->location; + + /* Declarations should occur before the first code block, which in hull shaders is marked by the first +- * phase instruction, and in all other shader types begins with the first label instruction. */ +- if (!after_declarations_section && !vsir_instruction_is_dcl(instruction) +- && instruction->handler_idx != VKD3DSIH_NOP) ++ * phase instruction, and in all other shader types begins with the first label instruction. ++ * Declaring an indexable temp with function scope is not considered a declaration, ++ * because it needs to live inside a function. */ ++ if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) + { +- after_declarations_section = true; +- cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); ++ bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP ++ && instruction->declaration.indexable_temp.has_function_scope; ++ ++ if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) ++ { ++ after_declarations_section = true; ++ cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); ++ } + } + + cf_info = flattener->control_flow_depth +@@ -2064,7 +2160,8 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + break; + + case VKD3DSIH_LABEL: +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ vkd3d_shader_error(message_context, &instruction->location, ++ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: Label instruction."); + return VKD3D_ERROR_NOT_IMPLEMENTED; + +@@ -2229,8 +2326,10 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) + { + WARN("Unexpected src swizzle %#x.\n", src->swizzle); +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, ++ vkd3d_shader_error(message_context, &instruction->location, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, + "The swizzle for a switch case value is not scalar X."); ++ cf_flattener_set_error(flattener, VKD3D_ERROR_INVALID_SHADER); + } + value = *src->reg.u.immconst_u32; + +@@ -2358,21 +2457,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + ++flattener->instruction_count; + } + +- return flattener->allocation_failed ? VKD3D_ERROR_OUT_OF_MEMORY : VKD3D_OK; ++ return flattener->status; + } + +-static enum vkd3d_result flatten_control_flow_constructs(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) + { +- struct vsir_program *program = &parser->program; +- struct cf_flattener flattener = {0}; ++ struct cf_flattener flattener = {.program = program}; + enum vkd3d_result result; + +- flattener.parser = parser; +- result = cf_flattener_iterate_instruction_array(&flattener); +- +- if (result >= 0) ++ if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) + { +- vkd3d_free(parser->program.instructions.elements); ++ vkd3d_free(program->instructions.elements); + program->instructions.elements = flattener.instructions; + program->instructions.capacity = flattener.instruction_capacity; + program->instructions.count = flattener.instruction_count; +@@ -2548,97 +2644,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) + } + } + +- /* Second subpass: creating new blocks might have broken +- * references in PHI instructions, so we use the block map to fix +- * them. */ +- current_label = 0; +- for (i = 0; i < ins_count; ++i) +- { +- struct vkd3d_shader_instruction *ins = &instructions[i]; +- struct vkd3d_shader_src_param *new_src; +- unsigned int j, l, new_src_count = 0; +- +- switch (ins->handler_idx) +- { +- case VKD3DSIH_LABEL: +- current_label = label_from_src_param(&ins->src[0]); +- continue; +- +- case VKD3DSIH_PHI: +- break; +- +- default: +- continue; +- } +- +- /* First count how many source parameters we need. */ +- for (j = 0; j < ins->src_count; j += 2) +- { +- unsigned int source_label = label_from_src_param(&ins->src[j + 1]); +- size_t k, match_count = 0; +- +- for (k = 0; k < map_count; ++k) +- { +- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; +- +- if (mapping->switch_label == source_label && mapping->target_label == current_label) +- match_count += 1; +- } +- +- new_src_count += (match_count != 0) ? 2 * match_count : 2; +- } +- +- assert(new_src_count >= ins->src_count); +- +- /* Allocate more source parameters if needed. */ +- if (new_src_count == ins->src_count) +- { +- new_src = ins->src; +- } +- else +- { +- if (!(new_src = vsir_program_get_src_params(program, new_src_count))) +- { +- ERR("Failed to allocate %u source parameters.\n", new_src_count); +- goto fail; +- } +- } +- +- /* Then do the copy. */ +- for (j = 0, l = 0; j < ins->src_count; j += 2) +- { +- unsigned int source_label = label_from_src_param(&ins->src[j + 1]); +- size_t k, match_count = 0; +- +- for (k = 0; k < map_count; ++k) +- { +- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; +- +- if (mapping->switch_label == source_label && mapping->target_label == current_label) +- { +- match_count += 1; +- +- new_src[l] = ins->src[j]; +- new_src[l + 1] = ins->src[j + 1]; +- new_src[l + 1].reg.idx[0].offset = mapping->if_label; +- l += 2; +- } +- } +- +- if (match_count == 0) +- { +- new_src[l] = ins->src[j]; +- new_src[l + 1] = ins->src[j + 1]; +- l += 2; +- } +- } +- +- assert(l == new_src_count); +- +- ins->src_count = new_src_count; +- ins->src = new_src; +- } +- + vkd3d_free(program->instructions.elements); + vkd3d_free(block_map); + program->instructions.elements = instructions; +@@ -2656,145 +2661,139 @@ fail: + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +-static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src); ++struct ssas_to_temps_alloc ++{ ++ unsigned int *table; ++ unsigned int next_temp_idx; ++}; + +-/* This is idempotent: it can be safely applied more than once on the +- * same register. */ +-static void materialize_ssas_to_temps_process_reg(struct vkd3d_shader_parser *parser, struct vkd3d_shader_register *reg) ++static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count) + { +- unsigned int i; ++ size_t i = ssa_count * sizeof(*alloc->table); + +- if (reg->type == VKD3DSPR_SSA) ++ if (!(alloc->table = vkd3d_malloc(i))) + { +- reg->type = VKD3DSPR_TEMP; +- reg->idx[0].offset += parser->program.temp_count; ++ ERR("Failed to allocate SSA table.\n"); ++ return false; + } ++ memset(alloc->table, 0xff, i); + +- for (i = 0; i < reg->idx_count; ++i) +- if (reg->idx[i].rel_addr) +- materialize_ssas_to_temps_process_src_param(parser, reg->idx[i].rel_addr); +-} +- +-static void materialize_ssas_to_temps_process_dst_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_dst_param *dst) +-{ +- materialize_ssas_to_temps_process_reg(parser, &dst->reg); +-} +- +-static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src) +-{ +- materialize_ssas_to_temps_process_reg(parser, &src->reg); ++ alloc->next_temp_idx = temp_count; ++ return true; + } + +-static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, +- unsigned int label) ++/* This is idempotent: it can be safely applied more than once on the ++ * same register. */ ++static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc, ++ struct vkd3d_shader_register *reg) + { + unsigned int i; + +- assert(ins->handler_idx == VKD3DSIH_PHI); +- +- for (i = 0; i < ins->src_count; i += 2) ++ if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX) + { +- if (label_from_src_param(&ins->src[i + 1]) == label) +- return &ins->src[i]; ++ reg->type = VKD3DSPR_TEMP; ++ reg->idx[0].offset = alloc->table[reg->idx[0].offset]; + } + +- vkd3d_unreachable(); ++ for (i = 0; i < reg->idx_count; ++i) ++ if (reg->idx[i].rel_addr) ++ materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg); + } + +-static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser *parser, +- struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, +- const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, +- const struct vkd3d_shader_src_param *source, bool invert) ++struct ssas_to_temps_block_info + { +- struct vkd3d_shader_src_param *src; +- struct vkd3d_shader_dst_param *dst; +- +- if (!vsir_instruction_init_with_params(&parser->program, instruction, loc, +- cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) +- return false; +- +- dst = instruction->dst; +- src = instruction->src; +- +- dst[0] = *dest; +- materialize_ssas_to_temps_process_dst_param(parser, &dst[0]); ++ struct phi_incoming_to_temp ++ { ++ struct vkd3d_shader_src_param *src; ++ struct vkd3d_shader_dst_param *dst; ++ } *incomings; ++ size_t incoming_capacity; ++ size_t incoming_count; ++}; + +- assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); +- assert(dst[0].modifiers == 0); +- assert(dst[0].shift == 0); ++static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *block_info, ++ size_t count) ++{ ++ size_t i; + +- if (cond) +- { +- src[0] = *cond; +- src[1 + invert] = *source; +- memset(&src[2 - invert], 0, sizeof(src[2 - invert])); +- src[2 - invert].reg = dst[0].reg; +- materialize_ssas_to_temps_process_src_param(parser, &src[1]); +- materialize_ssas_to_temps_process_src_param(parser, &src[2]); +- } +- else +- { +- src[0] = *source; +- materialize_ssas_to_temps_process_src_param(parser, &src[0]); +- } ++ for (i = 0; i < count; ++i) ++ vkd3d_free(block_info[i].incomings); + +- return true; ++ vkd3d_free(block_info); + } + +-static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) + { ++ size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; ++ struct ssas_to_temps_block_info *info, *block_info = NULL; + struct vkd3d_shader_instruction *instructions = NULL; +- struct materialize_ssas_to_temps_block_data +- { +- size_t phi_begin; +- size_t phi_count; +- } *block_index = NULL; +- size_t ins_capacity = 0, ins_count = 0, i; ++ struct ssas_to_temps_alloc alloc = {0}; + unsigned int current_label = 0; + +- if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) +- goto fail; +- +- if (!(block_index = vkd3d_calloc(parser->program.block_count, sizeof(*block_index)))) ++ if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) + { +- ERR("Failed to allocate block index.\n"); ++ ERR("Failed to allocate block info array.\n"); + goto fail; + } + +- for (i = 0; i < parser->program.instructions.count; ++i) ++ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) ++ goto fail; ++ ++ for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i) + { +- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ unsigned int j, temp_idx; + +- switch (ins->handler_idx) ++ /* Only phi src/dst SSA values need be converted here. Structurisation may ++ * introduce new cases of undominated SSA use, which will be handled later. */ ++ if (ins->handler_idx != VKD3DSIH_PHI) ++ continue; ++ ++phi_count; ++ ++ temp_idx = alloc.next_temp_idx++; ++ ++ for (j = 0; j < ins->src_count; j += 2) + { +- case VKD3DSIH_LABEL: +- current_label = label_from_src_param(&ins->src[0]); +- break; ++ struct phi_incoming_to_temp *incoming; ++ unsigned int label; + +- case VKD3DSIH_PHI: +- assert(current_label != 0); +- assert(i != 0); +- if (block_index[current_label - 1].phi_begin == 0) +- block_index[current_label - 1].phi_begin = i; +- block_index[current_label - 1].phi_count += 1; +- break; ++ label = label_from_src_param(&ins->src[j + 1]); ++ assert(label); + +- default: +- current_label = 0; +- break; ++ info = &block_info[label - 1]; ++ ++ if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1, ++ sizeof(*info->incomings)))) ++ goto fail; ++ ++ incoming = &info->incomings[info->incoming_count++]; ++ incoming->src = &ins->src[j]; ++ incoming->dst = ins->dst; ++ ++ alloc.table[ins->dst->reg.idx[0].offset] = temp_idx; ++ ++ ++incoming_count; + } ++ ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg); + } + +- for (i = 0; i < parser->program.instructions.count; ++i) ++ if (!phi_count) ++ goto done; ++ ++ if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) ++ goto fail; ++ ++ for (i = 0; i < program->instructions.count; ++i) + { +- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; ++ struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i]; + size_t j; + + for (j = 0; j < ins->dst_count; ++j) +- materialize_ssas_to_temps_process_dst_param(parser, &ins->dst[j]); ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); + + for (j = 0; j < ins->src_count; ++j) +- materialize_ssas_to_temps_process_src_param(parser, &ins->src[j]); ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); + + switch (ins->handler_idx) + { +@@ -2803,62 +2802,21 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p + break; + + case VKD3DSIH_BRANCH: +- { +- if (vsir_register_is_label(&ins->src[0].reg)) +- { +- const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1]; +- +- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count)) +- goto fail; +- +- for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j) +- { +- const struct vkd3d_shader_src_param *source; +- +- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); +- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, +- &parser->program.instructions.elements[j].dst[0], NULL, source, false)) +- goto fail; ++ case VKD3DSIH_SWITCH_MONOLITHIC: ++ info = &block_info[current_label - 1]; + +- ++ins_count; +- } +- } +- else ++ for (j = 0; j < info->incoming_count; ++j) + { +- struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1], +- *data_false = &block_index[label_from_src_param(&ins->src[2]) - 1]; +- const struct vkd3d_shader_src_param *cond = &ins->src[0]; ++ struct phi_incoming_to_temp *incoming = &info->incomings[j]; + +- if (!reserve_instructions(&instructions, &ins_capacity, +- ins_count + data_true->phi_count + data_false->phi_count)) ++ mov_ins = &instructions[ins_count++]; ++ if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0)) + goto fail; +- +- for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j) +- { +- const struct vkd3d_shader_src_param *source; +- +- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); +- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, +- &parser->program.instructions.elements[j].dst[0], cond, source, false)) +- goto fail; +- +- ++ins_count; +- } +- +- for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j) +- { +- const struct vkd3d_shader_src_param *source; +- +- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); +- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, +- &parser->program.instructions.elements[j].dst[0], cond, source, true)) +- goto fail; +- +- ++ins_count; +- } ++ *mov_ins->dst = *incoming->dst; ++ mov_ins->src = incoming->src; ++ mov_ins->src_count = 1; + } + break; +- } + + case VKD3DSIH_PHI: + continue; +@@ -2867,162 +2825,55 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p + break; + } + +- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) +- goto fail; +- + instructions[ins_count++] = *ins; + } + +- vkd3d_free(parser->program.instructions.elements); +- vkd3d_free(block_index); +- parser->program.instructions.elements = instructions; +- parser->program.instructions.capacity = ins_capacity; +- parser->program.instructions.count = ins_count; +- parser->program.temp_count += parser->program.ssa_count; +- parser->program.ssa_count = 0; ++ vkd3d_free(program->instructions.elements); ++ program->instructions.elements = instructions; ++ program->instructions.capacity = ins_capacity; ++ program->instructions.count = ins_count; ++ program->temp_count = alloc.next_temp_idx; ++done: ++ ssas_to_temps_block_info_cleanup(block_info, program->block_count); ++ vkd3d_free(alloc.table); + + return VKD3D_OK; + + fail: + vkd3d_free(instructions); +- vkd3d_free(block_index); ++ ssas_to_temps_block_info_cleanup(block_info, program->block_count); ++ vkd3d_free(alloc.table); + + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +-static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *parser) ++struct vsir_block_list + { +- const unsigned int block_temp_idx = parser->program.temp_count; +- struct vkd3d_shader_instruction *instructions = NULL; +- const struct vkd3d_shader_location no_loc = {0}; +- size_t ins_capacity = 0, ins_count = 0, i; +- bool first_label_found = false; ++ struct vsir_block **blocks; ++ size_t count, capacity; ++}; + +- if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) +- goto fail; ++static void vsir_block_list_init(struct vsir_block_list *list) ++{ ++ memset(list, 0, sizeof(*list)); ++} ++ ++static void vsir_block_list_cleanup(struct vsir_block_list *list) ++{ ++ vkd3d_free(list->blocks); ++} + +- for (i = 0; i < parser->program.instructions.count; ++i) ++static enum vkd3d_result vsir_block_list_add_checked(struct vsir_block_list *list, struct vsir_block *block) ++{ ++ if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) + { +- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; ++ ERR("Cannot extend block list.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } + +- switch (ins->handler_idx) +- { +- case VKD3DSIH_PHI: +- case VKD3DSIH_SWITCH_MONOLITHIC: +- vkd3d_unreachable(); +- +- case VKD3DSIH_LABEL: +- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 4)) +- goto fail; +- +- if (!first_label_found) +- { +- first_label_found = true; +- +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) +- goto fail; +- dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); +- src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); +- ins_count++; +- +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0)) +- goto fail; +- ins_count++; +- +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1)) +- goto fail; +- src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx); +- ins_count++; +- } +- +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1)) +- goto fail; +- src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); +- ins_count++; +- break; +- +- case VKD3DSIH_BRANCH: +- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 2)) +- goto fail; +- +- if (vsir_register_is_label(&ins->src[0].reg)) +- { +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) +- goto fail; +- dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); +- src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); +- ins_count++; +- } +- else +- { +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3)) +- goto fail; +- dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); +- instructions[ins_count].src[0] = ins->src[0]; +- src_param_init_const_uint(&instructions[ins_count].src[1], label_from_src_param(&ins->src[1])); +- src_param_init_const_uint(&instructions[ins_count].src[2], label_from_src_param(&ins->src[2])); +- ins_count++; +- } +- +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0)) +- goto fail; +- ins_count++; +- break; +- +- case VKD3DSIH_RET: +- default: +- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) +- goto fail; +- +- instructions[ins_count++] = *ins; +- break; +- } +- } +- +- assert(first_label_found); +- +- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3)) +- goto fail; +- +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0)) +- goto fail; +- ins_count++; +- +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0)) +- goto fail; +- ins_count++; +- +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0)) +- goto fail; +- ins_count++; +- +- vkd3d_free(parser->program.instructions.elements); +- parser->program.instructions.elements = instructions; +- parser->program.instructions.capacity = ins_capacity; +- parser->program.instructions.count = ins_count; +- parser->program.temp_count += 1; ++ list->blocks[list->count++] = block; + + return VKD3D_OK; +- +-fail: +- vkd3d_free(instructions); +- return VKD3D_ERROR_OUT_OF_MEMORY; +-} +- +-struct vsir_block_list +-{ +- struct vsir_block **blocks; +- size_t count, capacity; +-}; +- +-static void vsir_block_list_init(struct vsir_block_list *list) +-{ +- memset(list, 0, sizeof(*list)); +-} +- +-static void vsir_block_list_cleanup(struct vsir_block_list *list) +-{ +- vkd3d_free(list->blocks); + } + + static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +@@ -3031,22 +2882,21 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc + + for (i = 0; i < list->count; ++i) + if (block == list->blocks[i]) +- return VKD3D_OK; +- +- if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) +- { +- ERR("Cannot extend block list.\n"); +- return VKD3D_ERROR_OUT_OF_MEMORY; +- } ++ return VKD3D_FALSE; + +- list->blocks[list->count++] = block; ++ return vsir_block_list_add_checked(list, block); ++} + +- return VKD3D_OK; ++/* It is guaranteed that the relative order is kept. */ ++static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) ++{ ++ --list->count; ++ memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx) * sizeof(*list->blocks)); + } + + struct vsir_block + { +- unsigned int label; ++ unsigned int label, order_pos; + /* `begin' points to the instruction immediately following the + * LABEL that introduces the block. `end' points to the terminator + * instruction (either BRANCH or RET). They can coincide, meaning +@@ -3089,12 +2939,209 @@ static void vsir_block_cleanup(struct vsir_block *block) + vkd3d_free(block->dominates); + } + ++static int block_compare(const void *ptr1, const void *ptr2) ++{ ++ const struct vsir_block *block1 = *(const struct vsir_block **)ptr1; ++ const struct vsir_block *block2 = *(const struct vsir_block **)ptr2; ++ ++ return vkd3d_u32_compare(block1->label, block2->label); ++} ++ ++static void vsir_block_list_sort(struct vsir_block_list *list) ++{ ++ qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); ++} ++ ++static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block) ++{ ++ return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); ++} ++ ++struct vsir_cfg_structure_list ++{ ++ struct vsir_cfg_structure *structures; ++ size_t count, capacity; ++ unsigned int end; ++}; ++ ++struct vsir_cfg_structure ++{ ++ enum vsir_cfg_structure_type ++ { ++ /* Execute a block of the original VSIR program. */ ++ STRUCTURE_TYPE_BLOCK, ++ /* Execute a loop, which is identified by an index. */ ++ STRUCTURE_TYPE_LOOP, ++ /* Execute a selection construct. */ ++ STRUCTURE_TYPE_SELECTION, ++ /* Execute a `return' or a (possibly) multilevel `break' or ++ * `continue', targeting a loop by its index. If `condition' ++ * is non-NULL, then the jump is conditional (this is ++ * currently not allowed for `return'). */ ++ STRUCTURE_TYPE_JUMP, ++ } type; ++ union ++ { ++ struct vsir_block *block; ++ struct vsir_cfg_structure_loop ++ { ++ struct vsir_cfg_structure_list body; ++ unsigned idx; ++ bool needs_trampoline; ++ struct vsir_cfg_structure *outer_loop; ++ } loop; ++ struct vsir_cfg_structure_selection ++ { ++ struct vkd3d_shader_src_param *condition; ++ struct vsir_cfg_structure_list if_body; ++ struct vsir_cfg_structure_list else_body; ++ bool invert_condition; ++ } selection; ++ struct vsir_cfg_structure_jump ++ { ++ enum vsir_cfg_jump_type ++ { ++ /* NONE is available as an intermediate value, but it ++ * is not allowed in valid structured programs. */ ++ JUMP_NONE, ++ JUMP_BREAK, ++ JUMP_CONTINUE, ++ JUMP_RET, ++ } type; ++ unsigned int target; ++ struct vkd3d_shader_src_param *condition; ++ bool invert_condition; ++ bool needs_launcher; ++ } jump; ++ } u; ++}; ++ ++static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type); ++static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure); ++ ++static void vsir_cfg_structure_list_cleanup(struct vsir_cfg_structure_list *list) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < list->count; ++i) ++ vsir_cfg_structure_cleanup(&list->structures[i]); ++ vkd3d_free(list->structures); ++} ++ ++static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg_structure_list *list, ++ enum vsir_cfg_structure_type type) ++{ ++ struct vsir_cfg_structure *ret; ++ ++ if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + 1, ++ sizeof(*list->structures))) ++ return NULL; ++ ++ ret = &list->structures[list->count++]; ++ ++ vsir_cfg_structure_init(ret, type); ++ ++ return ret; ++} ++ ++static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_cfg_structure_list *list, ++ struct vsir_cfg_structure *begin, size_t size) ++{ ++ if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + size, ++ sizeof(*list->structures))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); ++ ++ list->count += size; ++ ++ return VKD3D_OK; ++} ++ ++static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type) ++{ ++ memset(structure, 0, sizeof(*structure)); ++ structure->type = type; ++} ++ ++static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) ++{ ++ switch (structure->type) ++ { ++ case STRUCTURE_TYPE_LOOP: ++ vsir_cfg_structure_list_cleanup(&structure->u.loop.body); ++ break; ++ ++ case STRUCTURE_TYPE_SELECTION: ++ vsir_cfg_structure_list_cleanup(&structure->u.selection.if_body); ++ vsir_cfg_structure_list_cleanup(&structure->u.selection.else_body); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++struct vsir_cfg_emit_target ++{ ++ struct vkd3d_shader_instruction *instructions; ++ size_t ins_capacity, ins_count; ++ unsigned int jump_target_temp_idx; ++ unsigned int temp_count; ++}; ++ + struct vsir_cfg + { ++ struct vkd3d_shader_message_context *message_context; + struct vsir_program *program; + struct vsir_block *blocks; + struct vsir_block *entry; + size_t block_count; ++ struct vkd3d_string_buffer debug_buffer; ++ ++ struct vsir_block_list *loops; ++ size_t loops_count, loops_capacity; ++ size_t *loops_by_header; ++ ++ struct vsir_block_list order; ++ struct cfg_loop_interval ++ { ++ /* `begin' is the position of the first block of the loop in ++ * the topological sort; `end' is the position of the first ++ * block after the loop. In other words, `begin' is where a ++ * `continue' instruction would jump and `end' is where a ++ * `break' instruction would jump. */ ++ unsigned int begin, end; ++ /* Each loop interval can be natural or synthetic. Natural ++ * intervals are added to represent loops given by CFG back ++ * edges. Synthetic intervals do not correspond to loops in ++ * the input CFG, but are added to leverage their `break' ++ * instruction in order to execute forward edges. ++ * ++ * For a synthetic loop interval it's not really important ++ * which one is the `begin' block, since we don't need to ++ * execute `continue' for them. So we have some leeway for ++ * moving it provided that these conditions are met: 1. the ++ * interval must contain all `break' instructions that target ++ * it, which in practice means that `begin' can be moved ++ * backward and not forward; 2. intervals must remain properly ++ * nested (for each pair of intervals, either one contains the ++ * other or they are disjoint). ++ * ++ * Subject to these conditions, we try to reuse the same loop ++ * as much as possible (if many forward edges target the same ++ * block), but we still try to keep `begin' as forward as ++ * possible, to keep the loop scope as small as possible. */ ++ bool synthetic; ++ /* The number of jump instructions (both conditional and ++ * unconditional) that target this loop. */ ++ unsigned int target_count; ++ } *loop_intervals; ++ size_t loop_interval_count, loop_interval_capacity; ++ ++ struct vsir_cfg_structure_list structured_program; ++ ++ struct vsir_cfg_emit_target *target; + }; + + static void vsir_cfg_cleanup(struct vsir_cfg *cfg) +@@ -3104,7 +3151,44 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) + for (i = 0; i < cfg->block_count; ++i) + vsir_block_cleanup(&cfg->blocks[i]); + ++ for (i = 0; i < cfg->loops_count; ++i) ++ vsir_block_list_cleanup(&cfg->loops[i]); ++ ++ vsir_block_list_cleanup(&cfg->order); ++ ++ vsir_cfg_structure_list_cleanup(&cfg->structured_program); ++ + vkd3d_free(cfg->blocks); ++ vkd3d_free(cfg->loops); ++ vkd3d_free(cfg->loops_by_header); ++ vkd3d_free(cfg->loop_intervals); ++ ++ if (TRACE_ON()) ++ vkd3d_string_buffer_cleanup(&cfg->debug_buffer); ++} ++ ++static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, ++ unsigned int end, bool synthetic) ++{ ++ struct cfg_loop_interval *interval; ++ ++ if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity, ++ cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ interval = &cfg->loop_intervals[cfg->loop_interval_count++]; ++ ++ interval->begin = begin; ++ interval->end = end; ++ interval->synthetic = synthetic; ++ interval->target_count = 0; ++ ++ return VKD3D_OK; ++} ++ ++static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) ++{ ++ return bitmap_is_set(b1->dominates, b2->label - 1); + } + + static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, +@@ -3145,268 +3229,1838 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) + shape = "trapezium"; + break; + +- case VKD3DSIH_BRANCH: +- shape = vsir_register_is_label(&block->end->src[0].reg) ? "ellipse" : "box"; ++ case VKD3DSIH_BRANCH: ++ shape = vsir_register_is_label(&block->end->src[0].reg) ? "ellipse" : "box"; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ TRACE(" n%u [label=\"%u\", shape=\"%s\"];\n", block->label, block->label, shape); ++ ++ for (j = 0; j < block->successors.count; ++j) ++ TRACE(" n%u -> n%u;\n", block->label, block->successors.blocks[j]->label); ++ } ++ ++ TRACE("}\n"); ++} ++ ++static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list); ++ ++static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure *structure) ++{ ++ switch (structure->type) ++ { ++ case STRUCTURE_TYPE_BLOCK: ++ TRACE("%sblock %u\n", cfg->debug_buffer.buffer, structure->u.block->label); ++ break; ++ ++ case STRUCTURE_TYPE_LOOP: ++ TRACE("%s%u : loop {\n", cfg->debug_buffer.buffer, structure->u.loop.idx); ++ ++ vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body); ++ ++ TRACE("%s} # %u%s\n", cfg->debug_buffer.buffer, structure->u.loop.idx, ++ structure->u.loop.needs_trampoline ? ", tramp" : ""); ++ break; ++ ++ case STRUCTURE_TYPE_SELECTION: ++ TRACE("%sif {\n", cfg->debug_buffer.buffer); ++ ++ vsir_cfg_structure_list_dump(cfg, &structure->u.selection.if_body); ++ ++ if (structure->u.selection.else_body.count == 0) ++ { ++ TRACE("%s}\n", cfg->debug_buffer.buffer); ++ } ++ else ++ { ++ TRACE("%s} else {\n", cfg->debug_buffer.buffer); ++ ++ vsir_cfg_structure_list_dump(cfg, &structure->u.selection.else_body); ++ ++ TRACE("%s}\n", cfg->debug_buffer.buffer); ++ } ++ break; ++ ++ case STRUCTURE_TYPE_JUMP: ++ { ++ const char *type_str; ++ ++ switch (structure->u.jump.type) ++ { ++ case JUMP_RET: ++ TRACE("%sret\n", cfg->debug_buffer.buffer); ++ return; ++ ++ case JUMP_BREAK: ++ type_str = "break"; ++ break; ++ ++ case JUMP_CONTINUE: ++ type_str = "continue"; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ TRACE("%s%s%s %u%s\n", cfg->debug_buffer.buffer, type_str, ++ structure->u.jump.condition ? "c" : "", structure->u.jump.target, ++ structure->u.jump.needs_launcher ? " # launch" : ""); ++ break; ++ } ++ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) ++{ ++ unsigned int i; ++ ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, " "); ++ ++ for (i = 0; i < list->count; ++i) ++ vsir_cfg_structure_dump(cfg, &list->structures[i]); ++ ++ vkd3d_string_buffer_truncate(&cfg->debug_buffer, cfg->debug_buffer.content_size - 2); ++} ++ ++static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < cfg->structured_program.count; ++i) ++ vsir_cfg_structure_dump(cfg, &cfg->structured_program.structures[i]); ++} ++ ++static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) ++{ ++ struct vsir_block *current_block = NULL; ++ enum vkd3d_result ret; ++ size_t i; ++ ++ memset(cfg, 0, sizeof(*cfg)); ++ cfg->message_context = message_context; ++ cfg->program = program; ++ cfg->block_count = program->block_count; ++ cfg->target = target; ++ ++ vsir_block_list_init(&cfg->order); ++ ++ if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ if (TRACE_ON()) ++ vkd3d_string_buffer_init(&cfg->debug_buffer); ++ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; ++ ++ switch (instruction->handler_idx) ++ { ++ case VKD3DSIH_PHI: ++ case VKD3DSIH_SWITCH_MONOLITHIC: ++ vkd3d_unreachable(); ++ ++ case VKD3DSIH_LABEL: ++ { ++ unsigned int label = label_from_src_param(&instruction->src[0]); ++ ++ assert(!current_block); ++ assert(label > 0); ++ assert(label <= cfg->block_count); ++ current_block = &cfg->blocks[label - 1]; ++ assert(current_block->label == 0); ++ if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) ++ goto fail; ++ current_block->begin = &program->instructions.elements[i + 1]; ++ if (!cfg->entry) ++ cfg->entry = current_block; ++ break; ++ } ++ ++ case VKD3DSIH_BRANCH: ++ case VKD3DSIH_RET: ++ assert(current_block); ++ current_block->end = instruction; ++ current_block = NULL; ++ break; ++ ++ default: ++ break; ++ } ++ } ++ ++ for (i = 0; i < cfg->block_count; ++i) ++ { ++ struct vsir_block *block = &cfg->blocks[i]; ++ ++ if (block->label == 0) ++ continue; ++ ++ switch (block->end->handler_idx) ++ { ++ case VKD3DSIH_RET: ++ break; ++ ++ case VKD3DSIH_BRANCH: ++ if (vsir_register_is_label(&block->end->src[0].reg)) ++ { ++ if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[0])) < 0) ++ goto fail; ++ } ++ else ++ { ++ if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[1])) < 0) ++ goto fail; ++ ++ if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[2])) < 0) ++ goto fail; ++ } ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ } ++ ++ if (TRACE_ON()) ++ vsir_cfg_dump_dot(cfg); ++ ++ return VKD3D_OK; ++ ++fail: ++ vsir_cfg_cleanup(cfg); ++ ++ return ret; ++} ++ ++/* Block A dominates block B if every path from the entry point to B ++ * must pass through A. Naively compute the set of blocks that are ++ * dominated by `reference' by running a graph visit starting from the ++ * entry point (which must be the initial value of `current') and ++ * avoiding `reference'. Running this for all the blocks takes ++ * quadratic time: if in the future something better is sought after, ++ * the standard tool seems to be the Lengauer-Tarjan algorithm. */ ++static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, struct vsir_block *reference) ++{ ++ size_t i; ++ ++ assert(current->label != 0); ++ ++ if (current == reference) ++ return; ++ ++ if (!bitmap_is_set(reference->dominates, current->label - 1)) ++ return; ++ ++ bitmap_clear(reference->dominates, current->label - 1); ++ ++ for (i = 0; i < current->successors.count; ++i) ++ vsir_cfg_compute_dominators_recurse(current->successors.blocks[i], reference); ++} ++ ++static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) ++{ ++ size_t i, j; ++ ++ for (i = 0; i < cfg->block_count; ++i) ++ { ++ struct vsir_block *block = &cfg->blocks[i]; ++ ++ if (block->label == 0) ++ continue; ++ ++ vsir_cfg_compute_dominators_recurse(cfg->entry, block); ++ ++ if (TRACE_ON()) ++ { ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates:", block->label); ++ for (j = 0; j < cfg->block_count; j++) ++ { ++ struct vsir_block *block2 = &cfg->blocks[j]; ++ ++ if (block2->label == 0) ++ continue; ++ ++ if (vsir_block_dominates(block, block2)) ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); ++ } ++ TRACE("%s\n", cfg->debug_buffer.buffer); ++ vkd3d_string_buffer_clear(&cfg->debug_buffer); ++ } ++ } ++} ++ ++/* A back edge is an edge X -> Y for which block Y dominates block ++ * X. All the other edges are forward edges, and it is required that ++ * the input CFG is reducible, i.e., it is acyclic once you strip away ++ * the back edges. ++ * ++ * Each back edge X -> Y defines a loop: block X is the header block, ++ * block Y is the back edge block, and the loop consists of all the ++ * blocks which are dominated by the header block and have a path to ++ * the back edge block that doesn't pass through the header block ++ * (including the header block itself). It can be proved that all the ++ * blocks in such a path (connecting a loop block to the back edge ++ * block without passing through the header block) belong to the same ++ * loop. ++ * ++ * If the input CFG is reducible its loops are properly nested (i.e., ++ * each two loops are either disjoint or one is contained in the ++ * other), provided that each block has at most one incoming back ++ * edge. If this condition does not hold, a synthetic block can be ++ * introduced as the only back edge block for the given header block, ++ * with all the previous back edge now being forward edges to the ++ * synthetic block. This is not currently implemented (but it is ++ * rarely found in practice anyway). */ ++static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block, ++ struct vsir_block *header) ++{ ++ enum vkd3d_result ret; ++ size_t i; ++ ++ if ((ret = vsir_block_list_add(loop, block)) < 0) ++ return ret; ++ ++ if (ret == VKD3D_FALSE || block == header) ++ return VKD3D_OK; ++ ++ for (i = 0; i < block->predecessors.count; ++i) ++ { ++ if ((ret = vsir_cfg_scan_loop(loop, block->predecessors.blocks[i], header)) < 0) ++ return ret; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) ++{ ++ size_t i, j, k; ++ ++ if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ memset(cfg->loops_by_header, 0xff, cfg->block_count * sizeof(*cfg->loops_by_header)); ++ ++ for (i = 0; i < cfg->block_count; ++i) ++ { ++ struct vsir_block *block = &cfg->blocks[i]; ++ ++ if (block->label == 0) ++ continue; ++ ++ for (j = 0; j < block->successors.count; ++j) ++ { ++ struct vsir_block *header = block->successors.blocks[j]; ++ struct vsir_block_list *loop; ++ enum vkd3d_result ret; ++ ++ /* Is this a back edge? */ ++ if (!vsir_block_dominates(header, block)) ++ continue; ++ ++ if (!vkd3d_array_reserve((void **)&cfg->loops, &cfg->loops_capacity, cfg->loops_count + 1, sizeof(*cfg->loops))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ loop = &cfg->loops[cfg->loops_count]; ++ vsir_block_list_init(loop); ++ ++ if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) ++ return ret; ++ ++ vsir_block_list_sort(loop); ++ ++ if (TRACE_ON()) ++ { ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); ++ ++ for (k = 0; k < loop->count; ++k) ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); ++ ++ TRACE("%s\n", cfg->debug_buffer.buffer); ++ vkd3d_string_buffer_clear(&cfg->debug_buffer); ++ } ++ ++ if (cfg->loops_by_header[header->label - 1] != SIZE_MAX) ++ { ++ FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label); ++ vkd3d_shader_error(cfg->message_context, &header->begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Block %u is header to more than one loop, this is not implemented.", header->label); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ ++ cfg->loops_by_header[header->label - 1] = cfg->loops_count; ++ ++ ++cfg->loops_count; ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ ++struct vsir_cfg_node_sorter ++{ ++ struct vsir_cfg *cfg; ++ struct vsir_cfg_node_sorter_stack_item ++ { ++ struct vsir_block_list *loop; ++ unsigned int seen_count; ++ unsigned int begin; ++ } *stack; ++ size_t stack_count, stack_capacity; ++ struct vsir_block_list available_blocks; ++}; ++ ++/* Topologically sort the blocks according to the forward edges. By ++ * definition if the input CFG is reducible then its forward edges ++ * form a DAG, so a topological sorting exists. In order to compute it ++ * we keep an array with the incoming degree for each block and an ++ * available list of all the blocks whose incoming degree has reached ++ * zero. At each step we pick a block from the available list and ++ * strip it away from the graph, updating the incoming degrees and ++ * available list. ++ * ++ * In principle at each step we can pick whatever node we want from ++ * the available list, and will get a topological sort ++ * anyway. However, we use these two criteria to give to the computed ++ * order additional properties: ++ * ++ * 1. we keep track of which loops we're into, and pick blocks ++ * belonging to the current innermost loop, so that loops are kept ++ * contiguous in the order; this can always be done when the input ++ * CFG is reducible; ++ * ++ * 2. subject to the requirement above, we always pick the most ++ * recently added block to the available list, because this tends ++ * to keep related blocks and require fewer control flow ++ * primitives. ++ */ ++static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) ++{ ++ struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; ++ unsigned int *in_degrees = NULL; ++ enum vkd3d_result ret; ++ size_t i; ++ ++ if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ for (i = 0; i < cfg->block_count; ++i) ++ { ++ struct vsir_block *block = &cfg->blocks[i]; ++ ++ if (block->label == 0) ++ { ++ in_degrees[i] = UINT_MAX; ++ continue; ++ } ++ ++ in_degrees[i] = block->predecessors.count; ++ ++ /* Do not count back edges. */ ++ if (cfg->loops_by_header[i] != SIZE_MAX) ++ { ++ assert(in_degrees[i] > 0); ++ in_degrees[i] -= 1; ++ } ++ ++ if (in_degrees[i] == 0 && block != cfg->entry) ++ { ++ WARN("Unexpected entry point %u.\n", block->label); ++ vkd3d_shader_error(cfg->message_context, &block->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, ++ "Block %u is unreachable from the entry point.", block->label); ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ goto fail; ++ } ++ } ++ ++ if (in_degrees[cfg->entry->label - 1] != 0) ++ { ++ WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); ++ vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, ++ "The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]); ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ goto fail; ++ } ++ ++ vsir_block_list_init(&sorter.available_blocks); ++ ++ if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, cfg->entry)) < 0) ++ goto fail; ++ ++ while (sorter.available_blocks.count != 0) ++ { ++ struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL; ++ struct vsir_block *block; ++ size_t new_seen_count; ++ ++ if (sorter.stack_count != 0) ++ inner_stack_item = &sorter.stack[sorter.stack_count - 1]; ++ ++ for (i = sorter.available_blocks.count - 1; ; --i) ++ { ++ if (i == SIZE_MAX) ++ { ++ ERR("Couldn't find any viable next block, is the input CFG reducible?\n"); ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ goto fail; ++ } ++ ++ block = sorter.available_blocks.blocks[i]; ++ ++ if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block)) ++ break; ++ } ++ ++ /* If the node is a loop header, open the loop. */ ++ if (sorter.cfg->loops_by_header[block->label - 1] != SIZE_MAX) ++ { ++ struct vsir_block_list *loop = &sorter.cfg->loops[sorter.cfg->loops_by_header[block->label - 1]]; ++ ++ if (loop) ++ { ++ if (!vkd3d_array_reserve((void **)&sorter.stack, &sorter.stack_capacity, ++ sorter.stack_count + 1, sizeof(*sorter.stack))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ inner_stack_item = &sorter.stack[sorter.stack_count++]; ++ inner_stack_item->loop = loop; ++ inner_stack_item->seen_count = 0; ++ inner_stack_item->begin = sorter.cfg->order.count; ++ } ++ } ++ ++ vsir_block_list_remove_index(&sorter.available_blocks, i); ++ block->order_pos = cfg->order.count; ++ if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) ++ goto fail; ++ ++ /* Close loops: since each loop is a strict subset of any ++ * outer loop, we just need to track how many blocks we've ++ * seen; when I close a loop I mark the same number of seen ++ * blocks for the next outer loop. */ ++ new_seen_count = 1; ++ while (sorter.stack_count != 0) ++ { ++ inner_stack_item = &sorter.stack[sorter.stack_count - 1]; ++ ++ inner_stack_item->seen_count += new_seen_count; ++ ++ assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); ++ if (inner_stack_item->seen_count != inner_stack_item->loop->count) ++ break; ++ ++ if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->begin, ++ cfg->order.count, false)) < 0) ++ goto fail; ++ ++ new_seen_count = inner_stack_item->loop->count; ++ --sorter.stack_count; ++ } ++ ++ /* Remove (forward) edges and make new nodes available. */ ++ for (i = 0; i < block->successors.count; ++i) ++ { ++ struct vsir_block *successor = block->successors.blocks[i]; ++ ++ if (vsir_block_dominates(successor, block)) ++ continue; ++ ++ assert(in_degrees[successor->label - 1] > 0); ++ --in_degrees[successor->label - 1]; ++ ++ if (in_degrees[successor->label - 1] == 0) ++ { ++ if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, successor)) < 0) ++ goto fail; ++ } ++ } ++ } ++ ++ if (cfg->order.count != cfg->block_count) ++ { ++ /* There is a cycle of forward edges. */ ++ WARN("The control flow graph is not reducible.\n"); ++ vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, ++ "The control flow graph is not reducible."); ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ goto fail; ++ } ++ ++ assert(sorter.stack_count == 0); ++ ++ vkd3d_free(in_degrees); ++ vkd3d_free(sorter.stack); ++ vsir_block_list_cleanup(&sorter.available_blocks); ++ ++ if (TRACE_ON()) ++ { ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); ++ ++ for (i = 0; i < cfg->order.count; ++i) ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); ++ ++ TRACE("%s\n", cfg->debug_buffer.buffer); ++ vkd3d_string_buffer_clear(&cfg->debug_buffer); ++ } ++ ++ return VKD3D_OK; ++ ++fail: ++ vkd3d_free(in_degrees); ++ vkd3d_free(sorter.stack); ++ vsir_block_list_cleanup(&sorter.available_blocks); ++ ++ return ret; ++} ++ ++/* Sort loop intervals first by ascending begin time and then by ++ * descending end time, so that inner intervals appear after outer ++ * ones and disjoint intervals appear in their proper order. */ ++static int compare_loop_intervals(const void *ptr1, const void *ptr2) ++{ ++ const struct cfg_loop_interval *interval1 = ptr1; ++ const struct cfg_loop_interval *interval2 = ptr2; ++ ++ if (interval1->begin != interval2->begin) ++ return vkd3d_u32_compare(interval1->begin, interval2->begin); ++ ++ return -vkd3d_u32_compare(interval1->end, interval2->end); ++} ++ ++static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) ++{ ++ enum vkd3d_result ret; ++ size_t i, j, k; ++ ++ for (i = 0; i < cfg->block_count; ++i) ++ { ++ struct vsir_block *block = &cfg->blocks[i]; ++ ++ if (block->label == 0) ++ continue; ++ ++ for (j = 0; j < block->successors.count; ++j) ++ { ++ struct vsir_block *successor = block->successors.blocks[j]; ++ struct cfg_loop_interval *extend = NULL; ++ unsigned int begin; ++ enum ++ { ++ ACTION_DO_NOTHING, ++ ACTION_CREATE_NEW, ++ ACTION_EXTEND, ++ } action = ACTION_CREATE_NEW; ++ ++ /* We've already contructed loop intervals for the back ++ * edges, there's nothing more to do. */ ++ if (vsir_block_dominates(successor, block)) ++ continue; ++ ++ assert(block->order_pos < successor->order_pos); ++ ++ /* Jumping from a block to the following one is always ++ * possible, so nothing to do. */ ++ if (block->order_pos + 1 == successor->order_pos) ++ continue; ++ ++ /* Let's look for a loop interval that already breaks at ++ * `successor' and either contains or can be extended to ++ * contain `block'. */ ++ for (k = 0; k < cfg->loop_interval_count; ++k) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; ++ ++ if (interval->end != successor->order_pos) ++ continue; ++ ++ if (interval->begin <= block->order_pos) ++ { ++ action = ACTION_DO_NOTHING; ++ break; ++ } ++ ++ if (interval->synthetic) ++ { ++ action = ACTION_EXTEND; ++ extend = interval; ++ break; ++ } ++ } ++ ++ if (action == ACTION_DO_NOTHING) ++ continue; ++ ++ /* Ok, we have to decide where the new or replacing ++ * interval has to begin. These are the rules: 1. it must ++ * begin before `block'; 2. intervals must be properly ++ * nested; 3. the new interval should begin as late as ++ * possible, to limit control flow depth and extension. */ ++ begin = block->order_pos; ++ ++ /* Our candidate interval is always [begin, ++ * successor->order_pos), and we move `begin' backward ++ * until the candidate interval contains all the intervals ++ * whose endpoint lies in the candidate interval ++ * itself. */ ++ for (k = 0; k < cfg->loop_interval_count; ++k) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; ++ ++ if (begin < interval->end && interval->end < successor->order_pos) ++ begin = min(begin, interval->begin); ++ } ++ ++ /* New we have to care about the intervals whose begin ++ * point lies in the candidate interval. We cannot move ++ * the candidate interval endpoint, because it is ++ * important that the loop break target matches ++ * `successor'. So we have to move that interval's begin ++ * point to the begin point of the candidate interval, ++ * i.e. `begin'. But what if the interval we should extend ++ * backward is not synthetic? This cannot happen, ++ * fortunately, because it would mean that there is a jump ++ * entering a loop via a block which is not the loop ++ * header, so the CFG would not be reducible. */ ++ for (k = 0; k < cfg->loop_interval_count; ++k) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; ++ ++ if (interval->begin < successor->order_pos && successor->order_pos < interval->end) ++ { ++ if (interval->synthetic) ++ interval->begin = min(begin, interval->begin); ++ assert(begin >= interval->begin); ++ } ++ } ++ ++ if (action == ACTION_EXTEND) ++ extend->begin = begin; ++ else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0) ++ return ret; ++ } ++ } ++ ++ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); ++ ++ if (TRACE_ON()) ++ for (i = 0; i < cfg->loop_interval_count; ++i) ++ TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural", ++ cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); ++ ++ return VKD3D_OK; ++} ++ ++struct vsir_cfg_edge_action ++{ ++ enum vsir_cfg_jump_type jump_type; ++ unsigned int target; ++ struct vsir_block *successor; ++}; ++ ++static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block *block, ++ struct vsir_block *successor, struct vsir_cfg_edge_action *action) ++{ ++ unsigned int i; ++ ++ action->target = UINT_MAX; ++ action->successor = successor; ++ ++ if (successor->order_pos <= block->order_pos) ++ { ++ /* The successor is before the current block, so we have to ++ * use `continue'. The target loop is the innermost that ++ * contains the current block and has the successor as ++ * `continue' target. */ ++ for (i = 0; i < cfg->loop_interval_count; ++i) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; ++ ++ if (interval->begin == successor->order_pos && block->order_pos < interval->end) ++ action->target = i; ++ ++ if (interval->begin > successor->order_pos) ++ break; ++ } ++ ++ assert(action->target != UINT_MAX); ++ action->jump_type = JUMP_CONTINUE; ++ } ++ else ++ { ++ /* The successor is after the current block, so we have to use ++ * `break', or possibly just jump to the following block. The ++ * target loop is the outermost that contains the current ++ * block and has the successor as `break' target. */ ++ for (i = 0; i < cfg->loop_interval_count; ++i) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; ++ ++ if (interval->begin <= block->order_pos && interval->end == successor->order_pos) ++ { ++ action->target = i; ++ break; ++ } ++ } ++ ++ if (action->target == UINT_MAX) ++ { ++ assert(successor->order_pos == block->order_pos + 1); ++ action->jump_type = JUMP_NONE; ++ } ++ else ++ { ++ action->jump_type = JUMP_BREAK; ++ } ++ } ++} ++ ++static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) ++{ ++ unsigned int i, stack_depth = 1, open_interval_idx = 0; ++ struct vsir_cfg_structure_list **stack = NULL; ++ ++ /* It's enough to allocate up to the maximum interval stacking ++ * depth (plus one for the full program), but this is simpler. */ ++ if (!(stack = vkd3d_calloc(cfg->loop_interval_count + 1, sizeof(*stack)))) ++ goto fail; ++ cfg->structured_program.end = cfg->order.count; ++ stack[0] = &cfg->structured_program; ++ ++ for (i = 0; i < cfg->order.count; ++i) ++ { ++ struct vsir_block *block = cfg->order.blocks[i]; ++ struct vsir_cfg_structure *structure; ++ ++ assert(stack_depth > 0); ++ ++ /* Open loop intervals. */ ++ while (open_interval_idx < cfg->loop_interval_count) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[open_interval_idx]; ++ ++ if (interval->begin != i) ++ break; ++ ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_LOOP))) ++ goto fail; ++ structure->u.loop.idx = open_interval_idx++; ++ ++ structure->u.loop.body.end = interval->end; ++ stack[stack_depth++] = &structure->u.loop.body; ++ } ++ ++ /* Execute the block. */ ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_BLOCK))) ++ goto fail; ++ structure->u.block = block; ++ ++ /* Generate between zero and two jump instructions. */ ++ switch (block->end->handler_idx) ++ { ++ case VKD3DSIH_BRANCH: ++ { ++ struct vsir_cfg_edge_action action_true, action_false; ++ bool invert_condition = false; ++ ++ if (vsir_register_is_label(&block->end->src[0].reg)) ++ { ++ unsigned int target = label_from_src_param(&block->end->src[0]); ++ struct vsir_block *successor = &cfg->blocks[target - 1]; ++ ++ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); ++ action_false = action_true; ++ } ++ else ++ { ++ unsigned int target = label_from_src_param(&block->end->src[1]); ++ struct vsir_block *successor = &cfg->blocks[target - 1]; ++ ++ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); ++ ++ target = label_from_src_param(&block->end->src[2]); ++ successor = &cfg->blocks[target - 1]; ++ ++ vsir_cfg_compute_edge_action(cfg, block, successor, &action_false); ++ } ++ ++ /* This will happen if the branch is unconditional, ++ * but also if it's conditional with the same target ++ * in both branches, which can happen in some corner ++ * cases, e.g. when converting switch instructions to ++ * selection ladders. */ ++ if (action_true.successor == action_false.successor) ++ { ++ assert(action_true.jump_type == action_false.jump_type); ++ } ++ else ++ { ++ /* At most one branch can just fall through to the ++ * next block, in which case we make sure it's the ++ * false branch. */ ++ if (action_true.jump_type == JUMP_NONE) ++ { ++ invert_condition = true; ++ } ++ else if (stack_depth >= 2) ++ { ++ struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; ++ struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; ++ ++ assert(inner_loop->type == STRUCTURE_TYPE_LOOP); ++ ++ /* Otherwise, if one of the branches is ++ * continueing the inner loop we're inside, ++ * make sure it's the false branch (because it ++ * will be optimized out later). */ ++ if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx) ++ invert_condition = true; ++ } ++ ++ if (invert_condition) ++ { ++ struct vsir_cfg_edge_action tmp = action_true; ++ action_true = action_false; ++ action_false = tmp; ++ } ++ ++ assert(action_true.jump_type != JUMP_NONE); ++ ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) ++ goto fail; ++ structure->u.jump.type = action_true.jump_type; ++ structure->u.jump.target = action_true.target; ++ structure->u.jump.condition = &block->end->src[0]; ++ structure->u.jump.invert_condition = invert_condition; ++ } ++ ++ if (action_false.jump_type != JUMP_NONE) ++ { ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) ++ goto fail; ++ structure->u.jump.type = action_false.jump_type; ++ structure->u.jump.target = action_false.target; ++ } ++ break; ++ } ++ ++ case VKD3DSIH_RET: ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) ++ goto fail; ++ structure->u.jump.type = JUMP_RET; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ /* Close loop intervals. */ ++ while (stack_depth > 0) ++ { ++ if (stack[stack_depth - 1]->end != i + 1) ++ break; ++ ++ --stack_depth; ++ } ++ } ++ ++ assert(stack_depth == 0); ++ assert(open_interval_idx == cfg->loop_interval_count); ++ ++ if (TRACE_ON()) ++ vsir_cfg_dump_structured_program(cfg); ++ ++ vkd3d_free(stack); ++ ++ return VKD3D_OK; ++ ++fail: ++ vkd3d_free(stack); ++ ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++} ++ ++static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list, unsigned int target) ++{ ++ struct vsir_cfg_structure *last = &list->structures[list->count - 1]; ++ ++ if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE ++ && !last->u.jump.condition && last->u.jump.target == target) ++ { ++ --list->count; ++ assert(cfg->loop_intervals[target].target_count > 0); ++ --cfg->loop_intervals[target].target_count; ++ } ++} ++ ++static struct vsir_cfg_structure *vsir_cfg_get_trailing_break(struct vsir_cfg_structure_list *list) ++{ ++ struct vsir_cfg_structure *structure; ++ size_t count = list->count; ++ ++ if (count == 0) ++ return NULL; ++ ++ structure = &list->structures[count - 1]; ++ ++ if (structure->type != STRUCTURE_TYPE_JUMP || structure->u.jump.type != JUMP_BREAK ++ || structure->u.jump.condition) ++ return NULL; ++ ++ return structure; ++} ++ ++/* When the last instruction in both branches of a selection construct ++ * is an unconditional break, any of them can be moved after the ++ * selection construct. If they break the same loop both of them can ++ * be moved out, otherwise we can choose which one: we choose the one ++ * that breaks the innermost loop, because we hope to eventually ++ * remove the loop itself. ++ * ++ * In principle a similar movement could be done when the last ++ * instructions are continue and continue, or continue and break. But ++ * in practice I don't think those situations can happen given the ++ * previous passes we do on the program, so we don't care. */ ++static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list) ++{ ++ struct vsir_cfg_structure *selection, *if_break, *else_break, *new_break; ++ unsigned int if_target, else_target, max_target; ++ size_t pos = list->count - 1; ++ ++ selection = &list->structures[pos]; ++ assert(selection->type == STRUCTURE_TYPE_SELECTION); ++ ++ if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); ++ else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); ++ ++ if (!if_break || !else_break) ++ return VKD3D_OK; ++ ++ if_target = if_break->u.jump.target; ++ else_target = else_break->u.jump.target; ++ max_target = max(if_target, else_target); ++ ++ if (!(new_break = vsir_cfg_structure_list_append(list, STRUCTURE_TYPE_JUMP))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ new_break->u.jump.type = JUMP_BREAK; ++ new_break->u.jump.target = max_target; ++ ++cfg->loop_intervals[max_target].target_count; ++ ++ /* Pointer `selection' could have been invalidated by the append ++ * operation. */ ++ selection = &list->structures[pos]; ++ assert(selection->type == STRUCTURE_TYPE_SELECTION); ++ ++ if (if_target == max_target) ++ { ++ --selection->u.selection.if_body.count; ++ assert(cfg->loop_intervals[if_target].target_count > 0); ++ --cfg->loop_intervals[if_target].target_count; ++ } ++ ++ if (else_target == max_target) ++ { ++ --selection->u.selection.else_body.count; ++ assert(cfg->loop_intervals[else_target].target_count > 0); ++ --cfg->loop_intervals[else_target].target_count; ++ } ++ ++ /* If a branch becomes empty, make it the else branch, so we save a block. */ ++ if (selection->u.selection.if_body.count == 0) ++ { ++ struct vsir_cfg_structure_list tmp; ++ ++ selection->u.selection.invert_condition = !selection->u.selection.invert_condition; ++ tmp = selection->u.selection.if_body; ++ selection->u.selection.if_body = selection->u.selection.else_body; ++ selection->u.selection.else_body = tmp; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections_recursively(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list) ++{ ++ struct vsir_cfg_structure *trailing; ++ ++ if (list->count == 0) ++ return VKD3D_OK; ++ ++ trailing = &list->structures[list->count - 1]; ++ ++ if (trailing->type != STRUCTURE_TYPE_SELECTION) ++ return VKD3D_OK; ++ ++ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.if_body); ++ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.else_body); ++ ++ return vsir_cfg_move_breaks_out_of_selections(cfg, list); ++} ++ ++static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list) ++{ ++ enum vkd3d_result ret; ++ size_t i; ++ ++ for (i = 0; i < list->count; ++i) ++ { ++ struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; ++ ++ if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) ++ continue; ++ ++ vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); ++ new_selection.u.selection.condition = structure->u.jump.condition; ++ new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; ++ ++ if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, ++ STRUCTURE_TYPE_JUMP))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ new_jump->u.jump.type = structure->u.jump.type; ++ new_jump->u.jump.target = structure->u.jump.target; ++ ++ /* Move the rest of the structure list in the else branch ++ * rather than leaving it after the selection construct. The ++ * reason is that this is more conducive to further ++ * optimization, because all the conditional `break's appear ++ * as the last instruction of a branch of a cascade of ++ * selection constructs at the end of the structure list we're ++ * processing, instead of being buried in the middle of the ++ * structure list itself. */ ++ if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, ++ &list->structures[i + 1], list->count - i - 1)) < 0) ++ return ret; ++ ++ *structure = new_selection; ++ list->count = i + 1; ++ ++ if ((ret = vsir_cfg_synthesize_selections(cfg, &structure->u.selection.else_body)) < 0) ++ return ret; ++ ++ if ((ret = vsir_cfg_move_breaks_out_of_selections(cfg, list)) < 0) ++ return ret; ++ ++ break; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *new_list, struct vsir_cfg_structure *loop) ++{ ++ struct vsir_cfg_structure_list *loop_body = &loop->u.loop.body; ++ unsigned int target, loop_idx = loop->u.loop.idx; ++ struct vsir_cfg_structure *trailing_break; ++ enum vkd3d_result ret; ++ ++ trailing_break = vsir_cfg_get_trailing_break(loop_body); ++ ++ /* If the loop's last instruction is not a break, we cannot remove ++ * the loop itself. */ ++ if (!trailing_break) ++ { ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) ++ return ret; ++ memset(loop, 0, sizeof(*loop)); ++ return VKD3D_OK; ++ } ++ ++ target = trailing_break->u.jump.target; ++ assert(cfg->loop_intervals[target].target_count > 0); ++ ++ /* If the loop is not targeted by any jump, we can remove it. The ++ * trailing `break' then targets another loop, so we have to keep ++ * it. */ ++ if (cfg->loop_intervals[loop_idx].target_count == 0) ++ { ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, ++ &loop_body->structures[0], loop_body->count)) < 0) ++ return ret; ++ loop_body->count = 0; ++ return VKD3D_OK; ++ } ++ ++ /* If the loop is targeted only by its own trailing `break' ++ * instruction, then we can remove it together with the `break' ++ * itself. */ ++ if (target == loop_idx && cfg->loop_intervals[loop_idx].target_count == 1) ++ { ++ --cfg->loop_intervals[loop_idx].target_count; ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, ++ &loop_body->structures[0], loop_body->count - 1)) < 0) ++ return ret; ++ loop_body->count = 0; ++ return VKD3D_OK; ++ } ++ ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) ++ return ret; ++ memset(loop, 0, sizeof(*loop)); ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) ++{ ++ struct vsir_cfg_structure_list old_list = *list, *new_list = list; ++ enum vkd3d_result ret; ++ size_t i; ++ ++ memset(new_list, 0, sizeof(*new_list)); ++ ++ for (i = 0; i < old_list.count; ++i) ++ { ++ struct vsir_cfg_structure *loop = &old_list.structures[i], *selection; ++ struct vsir_cfg_structure_list *loop_body; ++ ++ if (loop->type != STRUCTURE_TYPE_LOOP) ++ { ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) ++ goto out; ++ memset(loop, 0, sizeof(*loop)); ++ continue; ++ } ++ ++ loop_body = &loop->u.loop.body; ++ ++ if (loop_body->count == 0) ++ { ++ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) ++ goto out; ++ memset(loop, 0, sizeof(*loop)); ++ continue; ++ } ++ ++ vsir_cfg_remove_trailing_continue(cfg, loop_body, loop->u.loop.idx); ++ ++ if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_synthesize_selections(cfg, loop_body)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_append_loop(cfg, new_list, loop)) < 0) ++ goto out; ++ ++ /* If the last pushed instruction is a selection and one of the branches terminates with a ++ * `break', start pushing to the other branch, in the hope of eventually push a `break' ++ * there too and be able to remove a loop. */ ++ if (new_list->count == 0) ++ continue; ++ ++ selection = &new_list->structures[new_list->count - 1]; ++ ++ if (selection->type == STRUCTURE_TYPE_SELECTION) ++ { ++ if (vsir_cfg_get_trailing_break(&selection->u.selection.if_body)) ++ new_list = &selection->u.selection.else_body; ++ else if (vsir_cfg_get_trailing_break(&selection->u.selection.else_body)) ++ new_list = &selection->u.selection.if_body; ++ } ++ } ++ ++ ret = vsir_cfg_move_breaks_out_of_selections_recursively(cfg, list); ++ ++out: ++ vsir_cfg_structure_list_cleanup(&old_list); ++ ++ return ret; ++} ++ ++static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) ++{ ++ size_t i; ++ ++ for (i = 0; i < list->count; ++i) ++ { ++ struct vsir_cfg_structure *structure = &list->structures[i]; ++ ++ switch (structure->type) ++ { ++ case STRUCTURE_TYPE_BLOCK: ++ break; ++ ++ case STRUCTURE_TYPE_LOOP: ++ vsir_cfg_count_targets(cfg, &structure->u.loop.body); ++ break; ++ ++ case STRUCTURE_TYPE_SELECTION: ++ vsir_cfg_count_targets(cfg, &structure->u.selection.if_body); ++ vsir_cfg_count_targets(cfg, &structure->u.selection.else_body); ++ break; ++ ++ case STRUCTURE_TYPE_JUMP: ++ if (structure->u.jump.type == JUMP_BREAK || structure->u.jump.type == JUMP_CONTINUE) ++ ++cfg->loop_intervals[structure->u.jump.target].target_count; ++ break; ++ } ++ } ++} ++ ++/* Trampolines are code gadgets used to emulate multilevel jumps (which are not natively supported ++ * by SPIR-V). A trampoline is inserted just after a loop and checks whether control has reached the ++ * intended site (i.e., we just jumped out of the target block) or if other levels of jumping are ++ * needed. For each jump a trampoline is required for all the loops between the jump itself and the ++ * target loop, excluding the target loop itself. */ ++static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, ++ struct vsir_cfg_structure *loop) ++{ ++ size_t i; ++ ++ for (i = 0; i < list->count; ++i) ++ { ++ struct vsir_cfg_structure *structure = &list->structures[i]; ++ ++ switch (structure->type) ++ { ++ case STRUCTURE_TYPE_BLOCK: ++ break; ++ ++ case STRUCTURE_TYPE_LOOP: ++ structure->u.loop.outer_loop = loop; ++ vsir_cfg_mark_trampolines(cfg, &structure->u.loop.body, structure); ++ break; ++ ++ case STRUCTURE_TYPE_SELECTION: ++ vsir_cfg_mark_trampolines(cfg, &structure->u.selection.if_body, loop); ++ vsir_cfg_mark_trampolines(cfg, &structure->u.selection.else_body, loop); ++ break; ++ ++ case STRUCTURE_TYPE_JUMP: ++ { ++ struct vsir_cfg_structure *l; ++ if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) ++ break; ++ for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) ++ { ++ assert(l->type == STRUCTURE_TYPE_LOOP); ++ l->u.loop.needs_trampoline = true; ++ } ++ break; ++ } ++ } ++ } ++} ++ ++/* Launchers are the counterpart of trampolines. A launcher is inserted just before a jump, and ++ * writes in a well-known variable what is the target of the jump. Trampolines will then read that ++ * variable to decide how to redirect the jump to its intended target. A launcher is needed each ++ * time the innermost loop containing the jump itself has a trampoline (independently of whether the ++ * jump is targeting that loop or not). */ ++static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, ++ struct vsir_cfg_structure *loop) ++{ ++ size_t i; ++ ++ for (i = 0; i < list->count; ++i) ++ { ++ struct vsir_cfg_structure *structure = &list->structures[i]; ++ ++ switch (structure->type) ++ { ++ case STRUCTURE_TYPE_BLOCK: ++ break; ++ ++ case STRUCTURE_TYPE_LOOP: ++ vsir_cfg_mark_launchers(cfg, &structure->u.loop.body, structure); ++ break; ++ ++ case STRUCTURE_TYPE_SELECTION: ++ vsir_cfg_mark_launchers(cfg, &structure->u.selection.if_body, loop); ++ vsir_cfg_mark_launchers(cfg, &structure->u.selection.else_body, loop); ++ break; ++ ++ case STRUCTURE_TYPE_JUMP: ++ if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) ++ break; ++ assert(loop && loop->type == STRUCTURE_TYPE_LOOP); ++ if (loop->u.loop.needs_trampoline) ++ structure->u.jump.needs_launcher = true; ++ break; ++ } ++ } ++} ++ ++static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) ++{ ++ enum vkd3d_result ret; ++ ++ vsir_cfg_count_targets(cfg, &cfg->structured_program); ++ ++ ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); ++ ++ /* Trampolines and launchers cannot be marked with the same pass, ++ * because a jump might have to be marked as launcher even when it ++ * targets its innermost loop, if other jumps in the same loop ++ * need a trampoline anyway. So launchers can be discovered only ++ * once all the trampolines are known. */ ++ vsir_cfg_mark_trampolines(cfg, &cfg->structured_program, NULL); ++ vsir_cfg_mark_launchers(cfg, &cfg->structured_program, NULL); ++ ++ if (TRACE_ON()) ++ vsir_cfg_dump_structured_program(cfg); ++ ++ return ret; ++} ++ ++static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list, unsigned int loop_idx); ++ ++static enum vkd3d_result vsir_cfg_structure_list_emit_block(struct vsir_cfg *cfg, ++ struct vsir_block *block) ++{ ++ struct vsir_cfg_emit_target *target = cfg->target; ++ ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, ++ target->ins_count + (block->end - block->begin))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ memcpy(&target->instructions[target->ins_count], block->begin, ++ (char *)block->end - (char *)block->begin); ++ ++ target->ins_count += block->end - block->begin; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_loop *loop, unsigned int loop_idx) ++{ ++ struct vsir_cfg_emit_target *target = cfg->target; ++ const struct vkd3d_shader_location no_loc = {0}; ++ enum vkd3d_result ret; ++ ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_LOOP); ++ ++ if ((ret = vsir_cfg_structure_list_emit(cfg, &loop->body, loop->idx)) < 0) ++ return ret; ++ ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 5)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); ++ ++ /* Add a trampoline to implement multilevel jumping depending on the stored ++ * jump_target value. */ ++ if (loop->needs_trampoline) ++ { ++ /* If the multilevel jump is a `continue' and the target is the loop we're inside ++ * right now, then we can finally do the `continue'. */ ++ const unsigned int outer_continue_target = loop_idx << 1 | 1; ++ /* If the multilevel jump is a `continue' to any other target, or if it is a `break' ++ * and the target is not the loop we just finished emitting, then it means that ++ * we have to reach an outer loop, so we keep breaking. */ ++ const unsigned int inner_break_target = loop->idx << 1; ++ ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_IEQ, 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); ++ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); ++ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], outer_continue_target); ++ ++ ++target->ins_count; ++ ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); ++ ++ ++target->ins_count; ++ ++target->temp_count; ++ ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_IEQ, 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); ++ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); ++ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], inner_break_target); ++ ++ ++target->ins_count; ++ ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_BREAKP, 0, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; ++ ++ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); ++ ++ ++target->ins_count; ++ ++target->temp_count; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_structure_list_emit_selection(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_selection *selection, unsigned int loop_idx) ++{ ++ struct vsir_cfg_emit_target *target = cfg->target; ++ const struct vkd3d_shader_location no_loc = {0}; ++ enum vkd3d_result ret; ++ ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_IF, 0, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ target->instructions[target->ins_count].src[0] = *selection->condition; ++ ++ if (selection->invert_condition) ++ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; ++ ++ ++target->ins_count; ++ ++ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->if_body, loop_idx)) < 0) ++ return ret; ++ ++ if (selection->else_body.count != 0) ++ { ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ELSE); ++ ++ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->else_body, loop_idx)) < 0) ++ return ret; ++ } ++ ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDIF); ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_jump *jump, unsigned int loop_idx) ++{ ++ struct vsir_cfg_emit_target *target = cfg->target; ++ const struct vkd3d_shader_location no_loc = {0}; ++ /* Encode the jump target as the loop index plus a bit to remember whether ++ * we're breaking or continueing. */ ++ unsigned int jump_target = jump->target << 1; ++ enum vkd3d_shader_opcode opcode; ++ ++ switch (jump->type) ++ { ++ case JUMP_CONTINUE: ++ /* If we're continueing the loop we're directly inside, then we can emit a ++ * `continue'. Otherwise we first have to break all the loops between here ++ * and the loop to continue, recording our intention to continue ++ * in the lowest bit of jump_target. */ ++ if (jump->target == loop_idx) ++ { ++ opcode = jump->condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; ++ break; ++ } ++ jump_target |= 1; ++ /* fall through */ ++ ++ case JUMP_BREAK: ++ opcode = jump->condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; ++ break; ++ ++ case JUMP_RET: ++ assert(!jump->condition); ++ opcode = VKD3DSIH_RET; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ if (jump->needs_launcher) ++ { ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, VKD3DSIH_MOV, 1, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ dst_param_init_temp_uint(&target->instructions[target->ins_count].dst[0], target->jump_target_temp_idx); ++ src_param_init_const_uint(&target->instructions[target->ins_count].src[0], jump_target); ++ ++ ++target->ins_count; ++ } ++ ++ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], ++ &no_loc, opcode, 0, !!jump->condition)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ if (jump->invert_condition) ++ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; ++ ++ if (jump->condition) ++ target->instructions[target->ins_count].src[0] = *jump->condition; ++ ++ ++target->ins_count; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, ++ struct vsir_cfg_structure_list *list, unsigned int loop_idx) ++{ ++ enum vkd3d_result ret; ++ size_t i; ++ ++ for (i = 0; i < list->count; ++i) ++ { ++ struct vsir_cfg_structure *structure = &list->structures[i]; ++ ++ switch (structure->type) ++ { ++ case STRUCTURE_TYPE_BLOCK: ++ if ((ret = vsir_cfg_structure_list_emit_block(cfg, structure->u.block)) < 0) ++ return ret; ++ break; ++ ++ case STRUCTURE_TYPE_LOOP: ++ if ((ret = vsir_cfg_structure_list_emit_loop(cfg, &structure->u.loop, loop_idx)) < 0) ++ return ret; ++ break; ++ ++ case STRUCTURE_TYPE_SELECTION: ++ if ((ret = vsir_cfg_structure_list_emit_selection(cfg, &structure->u.selection, ++ loop_idx)) < 0) ++ return ret; ++ break; ++ ++ case STRUCTURE_TYPE_JUMP: ++ if ((ret = vsir_cfg_structure_list_emit_jump(cfg, &structure->u.jump, ++ loop_idx)) < 0) ++ return ret; + break; + + default: + vkd3d_unreachable(); + } +- +- TRACE(" n%u [label=\"%u\", shape=\"%s\"];\n", block->label, block->label, shape); +- +- for (j = 0; j < block->successors.count; ++j) +- TRACE(" n%u -> n%u;\n", block->label, block->successors.blocks[j]->label); + } + +- TRACE("}\n"); ++ return VKD3D_OK; + } + +-static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program) ++static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) ++{ ++ return vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX); ++} ++ ++static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) + { +- struct vsir_block *current_block = NULL; + enum vkd3d_result ret; +- size_t i; ++ struct vsir_cfg cfg; + +- memset(cfg, 0, sizeof(*cfg)); +- cfg->program = program; +- cfg->block_count = program->block_count; ++ if ((ret = vsir_cfg_init(&cfg, program, message_context, target)) < 0) ++ return ret; + +- if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ vsir_cfg_compute_dominators(&cfg); + +- for (i = 0; i < program->instructions.count; ++i) +- { +- struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; ++ if ((ret = vsir_cfg_compute_loops(&cfg)) < 0) ++ goto out; + +- switch (instruction->handler_idx) +- { +- case VKD3DSIH_PHI: +- case VKD3DSIH_SWITCH_MONOLITHIC: +- vkd3d_unreachable(); ++ if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0) ++ goto out; + +- case VKD3DSIH_LABEL: +- { +- unsigned int label = label_from_src_param(&instruction->src[0]); ++ if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) ++ goto out; + +- assert(!current_block); +- assert(label > 0); +- assert(label <= cfg->block_count); +- current_block = &cfg->blocks[label - 1]; +- assert(current_block->label == 0); +- if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) +- goto fail; +- current_block->begin = &program->instructions.elements[i + 1]; +- if (!cfg->entry) +- cfg->entry = current_block; +- break; +- } ++ if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0) ++ goto out; + +- case VKD3DSIH_BRANCH: +- case VKD3DSIH_RET: +- assert(current_block); +- current_block->end = instruction; +- current_block = NULL; +- break; ++ if ((ret = vsir_cfg_optimize(&cfg)) < 0) ++ goto out; + +- default: +- break; +- } +- } ++ ret = vsir_cfg_emit_structured_program(&cfg); + +- for (i = 0; i < cfg->block_count; ++i) +- { +- struct vsir_block *block = &cfg->blocks[i]; ++out: ++ vsir_cfg_cleanup(&cfg); + +- if (block->label == 0) +- continue; ++ return ret; ++} + +- switch (block->end->handler_idx) +- { +- case VKD3DSIH_RET: +- break; ++static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ struct vsir_cfg_emit_target target = {0}; ++ enum vkd3d_result ret; ++ unsigned int i; + +- case VKD3DSIH_BRANCH: +- if (vsir_register_is_label(&block->end->src[0].reg)) +- { +- if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[0])) < 0) +- goto fail; +- } +- else +- { +- if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[1])) < 0) +- goto fail; ++ target.jump_target_temp_idx = program->temp_count; ++ target.temp_count = program->temp_count + 1; + +- if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[2])) < 0) +- goto fail; +- } +- break; ++ if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- default: +- vkd3d_unreachable(); +- } ++ /* Copy declarations until the first block. */ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ if (ins->handler_idx == VKD3DSIH_LABEL) ++ break; ++ ++ target.instructions[target.ins_count++] = *ins; + } + +- if (TRACE_ON()) +- vsir_cfg_dump_dot(cfg); ++ if ((ret = vsir_program_structurize_function(program, message_context, &target)) < 0) ++ goto fail; ++ ++ vkd3d_free(program->instructions.elements); ++ program->instructions.elements = target.instructions; ++ program->instructions.capacity = target.ins_capacity; ++ program->instructions.count = target.ins_count; ++ program->temp_count = target.temp_count; + + return VKD3D_OK; + + fail: +- vsir_cfg_cleanup(cfg); ++ vkd3d_free(target.instructions); + + return ret; + } + +-/* Block A dominates block B if every path from the entry point to B +- * must pass through A. Naively compute the set of blocks that are +- * dominated by `reference' by running a graph visit starting from the +- * entry point (which must be the initial value of `current') and +- * avoiding `reference'. Running this for all the blocks takes +- * quadratic time: if in the future something better is sought after, +- * the standard tool seems to be the Lengauer-Tarjan algorithm. */ +-static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, struct vsir_block *reference) ++static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc, ++ struct vsir_block *block, struct vsir_block **origin_blocks) + { +- size_t i; +- +- assert(current->label != 0); +- +- if (current == reference) +- return; ++ unsigned int i; + +- if (!bitmap_is_set(reference->dominates, current->label - 1)) ++ if (!register_is_ssa(reg)) + return; + +- bitmap_clear(reference->dominates, current->label - 1); ++ i = reg->idx[0].offset; ++ if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) ++ alloc->table[i] = alloc->next_temp_idx++; + +- for (i = 0; i < current->successors.count; ++i) +- vsir_cfg_compute_dominators_recurse(current->successors.blocks[i], reference); ++ for (i = 0; i < reg->idx_count; ++i) ++ if (reg->idx[i].rel_addr) ++ register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks); + } + +-static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) ++/* Drivers are not necessarily optimised to handle very large numbers of temps. For example, ++ * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV. ++ * This can also result in the backend emitting less code because temps typically need an ++ * access chain and a load/store. Conversion of phi SSA values to temps should eliminate all ++ * undominated SSA use, but structurisation may create new occurrences. */ ++static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) + { +- struct vkd3d_string_buffer buf; +- size_t i, j; ++ struct vsir_program *program = cfg->program; ++ struct ssas_to_temps_alloc alloc = {0}; ++ struct vsir_block **origin_blocks; ++ unsigned int j; ++ size_t i; + +- if (TRACE_ON()) +- vkd3d_string_buffer_init(&buf); ++ if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks)))) ++ { ++ ERR("Failed to allocate origin block array.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) ++ { ++ vkd3d_free(origin_blocks); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; ++ struct vkd3d_shader_instruction *ins; + +- if (block->label == 0) +- continue; +- +- vsir_cfg_compute_dominators_recurse(cfg->entry, block); +- +- if (TRACE_ON()) ++ for (ins = block->begin; ins <= block->end; ++ins) + { +- vkd3d_string_buffer_printf(&buf, "Block %u dominates:", block->label); +- for (j = 0; j < cfg->block_count; j++) ++ for (j = 0; j < ins->dst_count; ++j) + { +- struct vsir_block *block2 = &cfg->blocks[j]; +- +- if (block2->label == 0) +- continue; +- +- if (bitmap_is_set(block->dominates, j)) +- vkd3d_string_buffer_printf(&buf, " %u", block2->label); ++ if (register_is_ssa(&ins->dst[j].reg)) ++ origin_blocks[ins->dst[j].reg.idx[0].offset] = block; + } +- TRACE("%s\n", buf.buffer); +- vkd3d_string_buffer_clear(&buf); + } + } + +- if (TRACE_ON()) +- vkd3d_string_buffer_cleanup(&buf); +-} +- +-enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_compile_info *compile_info) +-{ +- struct vkd3d_shader_instruction_array *instructions = &parser->program.instructions; +- enum vkd3d_result result = VKD3D_OK; +- +- remove_dcl_temps(&parser->program); +- +- if ((result = instruction_array_lower_texkills(parser)) < 0) +- return result; +- +- if (parser->shader_desc.is_dxil) ++ for (i = 0; i < cfg->block_count; ++i) + { +- struct vsir_cfg cfg; +- +- if ((result = lower_switch_to_if_ladder(&parser->program)) < 0) +- return result; +- +- if ((result = materialize_ssas_to_temps(parser)) < 0) +- return result; +- +- if ((result = vsir_cfg_init(&cfg, &parser->program)) < 0) +- return result; +- +- vsir_cfg_compute_dominators(&cfg); ++ struct vsir_block *block = &cfg->blocks[i]; ++ struct vkd3d_shader_instruction *ins; + +- if ((result = simple_structurizer_run(parser)) < 0) ++ for (ins = block->begin; ins <= block->end; ++ins) + { +- vsir_cfg_cleanup(&cfg); +- return result; ++ for (j = 0; j < ins->src_count; ++j) ++ register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks); + } +- +- vsir_cfg_cleanup(&cfg); + } +- else +- { +- if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_PIXEL) +- { +- if ((result = remap_output_signature(parser, compile_info)) < 0) +- return result; +- } +- +- if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL) +- { +- if ((result = instruction_array_flatten_hull_shader_phases(instructions)) < 0) +- return result; + +- if ((result = instruction_array_normalise_hull_shader_control_point_io(instructions, +- &parser->shader_desc.input_signature)) < 0) +- return result; +- } ++ if (alloc.next_temp_idx == program->temp_count) ++ goto done; + +- if ((result = shader_normalise_io_registers(parser)) < 0) +- return result; ++ TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); + +- if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0) +- return result; ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- remove_dead_code(&parser->program); ++ for (j = 0; j < ins->dst_count; ++j) ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); + +- if ((result = normalise_combined_samplers(parser)) < 0) +- return result; ++ for (j = 0; j < ins->src_count; ++j) ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); + } + +- if ((result = flatten_control_flow_constructs(parser)) < 0) +- return result; ++ program->temp_count = alloc.next_temp_idx; ++done: ++ vkd3d_free(origin_blocks); ++ vkd3d_free(alloc.table); + +- if (TRACE_ON()) +- vkd3d_shader_trace(&parser->program); ++ return VKD3D_OK; ++} + +- if (!parser->failed && (result = vsir_validate(parser)) < 0) +- return result; ++static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ enum vkd3d_result ret; ++ struct vsir_cfg cfg; ++ ++ if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL)) < 0) ++ return ret; + +- if (parser->failed) +- result = VKD3D_ERROR_INVALID_SHADER; ++ vsir_cfg_compute_dominators(&cfg); + +- return result; ++ ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg); ++ ++ vsir_cfg_cleanup(&cfg); ++ ++ return ret; + } + + struct validation_context + { +- struct vkd3d_shader_parser *parser; ++ struct vkd3d_shader_message_context *message_context; + const struct vsir_program *program; + size_t instruction_idx; ++ struct vkd3d_shader_location null_location; + bool invalid_instruction_idx; ++ enum vkd3d_result status; + bool dcl_temps_found; + enum vkd3d_shader_opcode phase; + enum cf_type +@@ -3452,16 +5106,21 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c + + if (ctx->invalid_instruction_idx) + { +- vkd3d_shader_parser_error(ctx->parser, error, "%s", buf.buffer); ++ vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); + ERR("VSIR validation error: %s\n", buf.buffer); + } + else + { +- vkd3d_shader_parser_error(ctx->parser, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); ++ const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; ++ vkd3d_shader_error(ctx->message_context, &ins->location, error, ++ "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); + ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); + } + + vkd3d_string_buffer_cleanup(&buf); ++ ++ if (!ctx->status) ++ ctx->status = VKD3D_ERROR_INVALID_SHADER; + } + + static void vsir_validate_src_param(struct validation_context *ctx, +@@ -3515,10 +5174,10 @@ static void vsir_validate_register(struct validation_context *ctx, + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); + +- if (reg->idx[0].offset >= ctx->parser->program.temp_count) ++ if (reg->idx[0].offset >= ctx->program->temp_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", +- reg->idx[0].offset, ctx->parser->program.temp_count); ++ reg->idx[0].offset, ctx->program->temp_count); + break; + } + +@@ -3606,7 +5265,7 @@ static void vsir_validate_register(struct validation_context *ctx, + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", + reg->precision); + +- if (reg->data_type != VKD3D_DATA_UINT) ++ if (reg->data_type != VKD3D_DATA_UNUSED) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", + reg->data_type); + +@@ -3708,7 +5367,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx, + switch (dst->reg.type) + { + case VKD3DSPR_SSA: +- if (dst->reg.idx[0].offset < ctx->parser->program.ssa_count) ++ if (dst->reg.idx[0].offset < ctx->program->ssa_count) + { + struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; + +@@ -3761,7 +5420,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, + switch (src->reg.type) + { + case VKD3DSPR_SSA: +- if (src->reg.idx[0].offset < ctx->parser->program.ssa_count) ++ if (src->reg.idx[0].offset < ctx->program->ssa_count) + { + struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; + unsigned int i; +@@ -3852,7 +5511,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) + size_t i; + + instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; +- ctx->parser->location = instruction->location; + + for (i = 0; i < instruction->dst_count; ++i) + vsir_validate_dst_param(ctx, &instruction->dst[i]); +@@ -3884,6 +5542,46 @@ static void vsir_validate_instruction(struct validation_context *ctx) + ctx->dcl_temps_found = false; + return; + ++ case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: ++ /* Exclude non-finite values. */ ++ if (!(instruction->declaration.max_tessellation_factor >= 1.0f ++ && instruction->declaration.max_tessellation_factor <= 64.0f)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.", ++ instruction->declaration.max_tessellation_factor); ++ return; ++ ++ /* The DXIL parser can generate these outside phases, but this is not an issue. */ ++ case VKD3DSIH_DCL_INPUT: ++ case VKD3DSIH_DCL_OUTPUT: ++ return; ++ ++ case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: ++ if (!instruction->declaration.count || instruction->declaration.count > 32) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", ++ instruction->declaration.count); ++ return; ++ ++ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: ++ if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID ++ || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); ++ return; ++ ++ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: ++ if (!instruction->declaration.tessellator_output_primitive ++ || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive); ++ return; ++ ++ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: ++ if (!instruction->declaration.tessellator_partitioning ++ || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning); ++ return; ++ + default: + break; + } +@@ -4203,17 +5901,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) + } + } + +-enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) ++enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, ++ const char *source_name, struct vkd3d_shader_message_context *message_context) + { + struct validation_context ctx = + { +- .parser = parser, +- .program = &parser->program, ++ .message_context = message_context, ++ .program = program, ++ .null_location = {.source_name = source_name}, ++ .status = VKD3D_OK, + .phase = VKD3DSIH_INVALID, + }; + unsigned int i; + +- if (!(parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) ++ if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) + return VKD3D_OK; + + if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) +@@ -4222,7 +5923,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) + if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) + goto fail; + +- for (ctx.instruction_idx = 0; ctx.instruction_idx < parser->program.instructions.count; ++ctx.instruction_idx) ++ for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) + vsir_validate_instruction(&ctx); + + ctx.invalid_instruction_idx = true; +@@ -4247,7 +5948,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) + vkd3d_free(ctx.temps); + vkd3d_free(ctx.ssas); + +- return VKD3D_OK; ++ return ctx.status; + + fail: + vkd3d_free(ctx.blocks); +@@ -4256,3 +5957,72 @@ fail: + + return VKD3D_ERROR_OUT_OF_MEMORY; + } ++ ++enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) ++{ ++ enum vkd3d_result result = VKD3D_OK; ++ ++ if ((result = vsir_program_lower_instructions(program)) < 0) ++ return result; ++ ++ if (program->shader_version.major >= 6) ++ { ++ if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) ++ return result; ++ ++ if ((result = lower_switch_to_if_ladder(program)) < 0) ++ return result; ++ ++ if ((result = vsir_program_structurize(program, message_context)) < 0) ++ return result; ++ ++ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) ++ return result; ++ ++ if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0) ++ return result; ++ } ++ else ++ { ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ { ++ if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) ++ return result; ++ } ++ ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) ++ { ++ if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) ++ return result; ++ ++ if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, ++ &program->input_signature)) < 0) ++ return result; ++ } ++ ++ if ((result = vsir_program_normalise_io_registers(program)) < 0) ++ return result; ++ ++ if ((result = instruction_array_normalise_flat_constants(program)) < 0) ++ return result; ++ ++ remove_dead_code(program); ++ ++ if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) ++ return result; ++ ++ if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL ++ && (result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) ++ return result; ++ } ++ ++ if (TRACE_ON()) ++ vkd3d_shader_trace(program); ++ ++ if ((result = vsir_program_validate(program, config_flags, ++ compile_info->source_name, message_context)) < 0) ++ return result; ++ ++ return result; ++} +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 5c87ff15503..c4e712b8471 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -45,6 +45,8 @@ static spv_target_env spv_target_env_from_vkd3d(enum vkd3d_shader_spirv_environm + return SPV_ENV_OPENGL_4_5; + case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: + return SPV_ENV_VULKAN_1_0; ++ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: ++ return SPV_ENV_VULKAN_1_1; + default: + ERR("Invalid environment %#x.\n", environment); + return SPV_ENV_VULKAN_1_0; +@@ -223,7 +225,8 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d + } + } + +-#define VKD3D_SPIRV_VERSION 0x00010000 ++#define VKD3D_SPIRV_VERSION_1_0 0x00010000 ++#define VKD3D_SPIRV_VERSION_1_3 0x00010300 + #define VKD3D_SPIRV_GENERATOR_ID 18 + #define VKD3D_SPIRV_GENERATOR_VERSION 11 + #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) +@@ -1524,6 +1527,19 @@ static uint32_t vkd3d_spirv_build_op_logical_equal(struct vkd3d_spirv_builder *b + SpvOpLogicalEqual, result_type, operand0, operand1); + } + ++static uint32_t vkd3d_spirv_build_op_logical_or(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t operand0, uint32_t operand1) ++{ ++ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, ++ SpvOpLogicalOr, result_type, operand0, operand1); ++} ++ ++static uint32_t vkd3d_spirv_build_op_logical_not(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t operand) ++{ ++ return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpLogicalNot, result_type, operand); ++} ++ + static uint32_t vkd3d_spirv_build_op_convert_utof(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t unsigned_value) + { +@@ -1825,6 +1841,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder + { + switch (data_type) + { ++ case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ + case VKD3D_DATA_FLOAT: + case VKD3D_DATA_SNORM: + case VKD3D_DATA_UNORM: +@@ -1832,6 +1849,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder + break; + case VKD3D_DATA_INT: + case VKD3D_DATA_UINT: ++ case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ + return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); + break; + case VKD3D_DATA_DOUBLE: +@@ -1900,7 +1918,7 @@ static void vkd3d_spirv_builder_free(struct vkd3d_spirv_builder *builder) + } + + static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, +- struct vkd3d_shader_code *spirv, const char *entry_point) ++ struct vkd3d_shader_code *spirv, const char *entry_point, enum vkd3d_shader_spirv_environment environment) + { + uint64_t capability_mask = builder->capability_mask; + struct vkd3d_spirv_stream stream; +@@ -1911,7 +1929,8 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, + vkd3d_spirv_stream_init(&stream); + + vkd3d_spirv_build_word(&stream, SpvMagicNumber); +- vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_VERSION); ++ vkd3d_spirv_build_word(&stream, (environment == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1) ++ ? VKD3D_SPIRV_VERSION_1_3 : VKD3D_SPIRV_VERSION_1_0); + vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_GENERATOR_MAGIC); + vkd3d_spirv_build_word(&stream, builder->current_id); /* bound */ + vkd3d_spirv_build_word(&stream, 0); /* schema, reserved */ +@@ -1940,6 +1959,9 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStorageImageArrayDynamicIndexing) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderNonUniformEXT)) + vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_descriptor_indexing"); ++ if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderPixelInterlockEXT) ++ || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderSampleInterlockEXT)) ++ vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_fragment_shader_interlock"); + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStencilExportEXT)) + vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_shader_stencil_export"); + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderViewportIndexLayerEXT)) +@@ -2346,6 +2368,7 @@ struct spirv_compiler + unsigned int output_control_point_count; + + bool use_vocp; ++ bool use_invocation_interlock; + bool emit_point_size; + + enum vkd3d_shader_opcode phase; +@@ -2427,14 +2450,14 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) + vkd3d_free(compiler); + } + +-static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, +- struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, ++static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, + uint64_t config_flags) + { +- const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; +- const struct shader_signature *output_signature = &shader_desc->output_signature; ++ const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; ++ const struct shader_signature *output_signature = &program->output_signature; + const struct vkd3d_shader_interface_info *shader_interface; + const struct vkd3d_shader_descriptor_offset_info *offset_info; + const struct vkd3d_shader_spirv_target_info *target_info; +@@ -2456,6 +2479,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve + { + case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: + case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: ++ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: + break; + default: + WARN("Invalid target environment %#x.\n", target_info->environment); +@@ -2545,7 +2569,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve + + rb_init(&compiler->symbol_table, vkd3d_symbol_compare); + +- compiler->shader_type = shader_version->type; ++ compiler->shader_type = program->shader_version.type; + + if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) + { +@@ -3372,7 +3396,7 @@ struct vkd3d_shader_register_info + bool is_aggregate; + }; + +-static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, ++static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_info *register_info) + { + struct vkd3d_symbol reg_symbol, *symbol; +@@ -3398,7 +3422,8 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil + vkd3d_symbol_make_register(®_symbol, reg); + if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) + { +- FIXME("Unrecognized register (%s).\n", debug_vkd3d_symbol(®_symbol)); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE, ++ "Unrecognized register (%s).\n", debug_vkd3d_symbol(®_symbol)); + memset(register_info, 0, sizeof(*register_info)); + return false; + } +@@ -3736,6 +3761,70 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil + return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); + } + ++/* Based on the implementation in the OpenGL Mathematics library. */ ++static uint32_t half_to_float(uint16_t value) ++{ ++ uint32_t s = (value & 0x8000u) << 16; ++ uint32_t e = (value >> 10) & 0x1fu; ++ uint32_t m = value & 0x3ffu; ++ ++ if (!e) ++ { ++ if (!m) ++ { ++ /* Plus or minus zero */ ++ return s; ++ } ++ else ++ { ++ /* Denormalized number -- renormalize it */ ++ ++ while (!(m & 0x400u)) ++ { ++ m <<= 1; ++ --e; ++ } ++ ++ ++e; ++ m &= ~0x400u; ++ } ++ } ++ else if (e == 31u) ++ { ++ /* Positive or negative infinity for zero 'm'. ++ * Nan for non-zero 'm' -- preserve sign and significand bits */ ++ return s | 0x7f800000u | (m << 13); ++ } ++ ++ /* Normalized number */ ++ e += 127u - 15u; ++ m <<= 13; ++ ++ /* Assemble s, e and m. */ ++ return s | (e << 23) | m; ++} ++ ++static uint32_t convert_raw_constant32(enum vkd3d_data_type data_type, unsigned int uint_value) ++{ ++ int16_t i; ++ ++ /* TODO: native 16-bit support. */ ++ if (data_type != VKD3D_DATA_UINT16 && data_type != VKD3D_DATA_HALF) ++ return uint_value; ++ ++ if (data_type == VKD3D_DATA_HALF) ++ return half_to_float(uint_value); ++ ++ /* Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or ++ * should not be sign-extended when 16-bit execution is not supported. The AMD RX 580 Windows ++ * driver has no 16-bit support, and sign-extends all 16-bit constant ints to 32 bits. These ++ * results differ from SM 5. The RX 6750 XT supports 16-bit execution, so constants are not ++ * extended, and results match SM 5. It seems best to replicate the sign-extension, and if ++ * execution is 16-bit, the values will be truncated. */ ++ i = uint_value; ++ return (int32_t)i; ++} ++ + static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) + { +@@ -3748,14 +3837,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile + if (reg->dimension == VSIR_DIMENSION_SCALAR) + { + for (i = 0; i < component_count; ++i) +- values[i] = *reg->u.immconst_u32; ++ values[i] = convert_raw_constant32(reg->data_type, reg->u.immconst_u32[0]); + } + else + { + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) +- values[j++] = reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]; ++ values[j++] = convert_raw_constant32(reg->data_type, ++ reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]); + } + } + +@@ -3899,6 +3989,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil + + switch (icb->data_type) + { ++ case VKD3D_DATA_HALF: ++ case VKD3D_DATA_UINT16: ++ /* Scalar only. */ ++ for (i = 0; i < element_count; ++i) ++ elements[i] = vkd3d_spirv_get_op_constant(builder, elem_type_id, ++ convert_raw_constant32(icb->data_type, icb->data[i])); ++ break; + case VKD3D_DATA_FLOAT: + case VKD3D_DATA_INT: + case VKD3D_DATA_UINT: +@@ -3998,7 +4095,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + struct vkd3d_shader_register_info reg_info; + unsigned int component_count; + uint32_t type_id, val_id; +- uint32_t write_mask32; ++ uint32_t val_write_mask; + + if (reg->type == VKD3DSPR_IMMCONST) + return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); +@@ -4018,17 +4115,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + return vkd3d_spirv_get_op_undef(builder, type_id); + } +- assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); + spirv_compiler_emit_dereference_register(compiler, reg, ®_info); + +- write_mask32 = data_type_is_64_bit(reg->data_type) ? vsir_write_mask_32_from_64(write_mask) : write_mask; ++ val_write_mask = (data_type_is_64_bit(reg->data_type) && !component_type_is_64_bit(reg_info.component_type)) ++ ? vsir_write_mask_32_from_64(write_mask) : write_mask; + + /* Intermediate value (no storage class). */ + if (reg_info.storage_class == SpvStorageClassMax) + { + val_id = reg_info.id; + } +- else if (vsir_write_mask_component_count(write_mask32) == 1) ++ else if (vsir_write_mask_component_count(val_write_mask) == 1) + { + return spirv_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); + } +@@ -4041,7 +4138,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + + swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; + val_id = spirv_compiler_emit_swizzle(compiler, +- val_id, reg_info.write_mask, reg_info.component_type, swizzle, write_mask32); ++ val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask); + + if (component_type != reg_info.component_type) + { +@@ -4087,7 +4184,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, + uint32_t type_id; + + type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); +- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) ++ if (data_type_is_floating_point(reg->data_type)) + return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id); + + FIXME("Unhandled data type %#x.\n", reg->data_type); +@@ -4101,7 +4198,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, + uint32_t type_id; + + type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); +- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) ++ if (data_type_is_floating_point(reg->data_type)) + return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); + else if (data_type_is_integer(reg->data_type)) + return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); +@@ -4285,7 +4382,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, + } + + type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); +- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) ++ if (data_type_is_floating_point(reg->data_type)) + return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id); + + FIXME("Unhandled data type %#x.\n", reg->data_type); +@@ -4322,11 +4419,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp + { + unsigned int component_count = vsir_write_mask_component_count(dst->write_mask); + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- uint32_t type_id, val_id; ++ uint32_t type_id, dst_type_id, val_id; + ++ type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + if (component_count > 1) + { +- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + val_id = vkd3d_spirv_build_op_composite_construct(builder, + type_id, component_ids, component_count); + } +@@ -4334,6 +4431,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp + { + val_id = *component_ids; + } ++ ++ dst_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); ++ if (dst_type_id != type_id) ++ val_id = vkd3d_spirv_build_op_bitcast(builder, dst_type_id, val_id); ++ + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + +@@ -6272,9 +6374,24 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) + vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); + +- if (d->uav_flags & VKD3DSUF_GLOBALLY_COHERENT) ++ /* ROVs are implicitly globally coherent. */ ++ if (d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW)) + vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationCoherent, NULL, 0); + ++ if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) ++ { ++ if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, ++ "Rasteriser-ordered views are only supported in fragment shaders."); ++ else if (!spirv_compiler_is_target_extension_supported(compiler, ++ VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK)) ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, ++ "Cannot enable fragment shader interlock. " ++ "The target environment does not support fragment shader interlock."); ++ else ++ compiler->use_invocation_interlock = true; ++ } ++ + if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) + { + assert(structure_stride); /* counters are valid only for structured buffers */ +@@ -6324,20 +6441,26 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + } + + static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, +- const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) ++ const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size, ++ unsigned int structure_stride, bool zero_init) + { +- uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id; ++ uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const SpvStorageClass storage_class = SpvStorageClassWorkgroup; + struct vkd3d_symbol reg_symbol; + ++ /* Alignment is supported only in the Kernel execution model. */ ++ if (alignment) ++ TRACE("Ignoring alignment %u.\n", alignment); ++ + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + length_id = spirv_compiler_get_constant_uint(compiler, size); + array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + + pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); ++ init_id = zero_init ? vkd3d_spirv_get_op_constant_null(builder, array_type_id) : 0; + var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, +- pointer_type_id, storage_class, 0); ++ pointer_type_id, storage_class, init_id); + + spirv_compiler_emit_register_debug_name(builder, var_id, reg); + +@@ -6352,8 +6475,8 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { + const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; +- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, +- tgsm_raw->byte_count / 4, 0); ++ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, tgsm_raw->alignment, ++ tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); + } + + static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler, +@@ -6361,8 +6484,8 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi + { + const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; + unsigned int stride = tgsm_structured->byte_stride / 4; +- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, +- tgsm_structured->structure_count * stride, stride); ++ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, tgsm_structured->alignment, ++ tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); + } + + static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, +@@ -6871,7 +6994,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, + assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); + + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); +- if (dst->reg.data_type == VKD3D_DATA_FLOAT) ++ if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) + { + val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); + } +@@ -6880,7 +7003,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, + /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ + val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); + } +- else if (dst->reg.data_type == VKD3D_DATA_UINT) ++ else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) + { + val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); + } +@@ -6909,6 +7032,15 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + SpvOp op = SpvOpMax; + unsigned int i; + ++ if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) ++ { ++ /* At least some drivers support this anyway, but if validation is enabled it will fail. */ ++ FIXME("Unsupported 64-bit source for bit count.\n"); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, ++ "64-bit source for bit count is not supported."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ + if (src->reg.data_type == VKD3D_DATA_BOOL) + { + if (dst->reg.data_type == VKD3D_DATA_BOOL) +@@ -6997,6 +7129,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( + } + glsl_insts[] = + { ++ {VKD3DSIH_ABS, GLSLstd450FAbs}, + {VKD3DSIH_ACOS, GLSLstd450Acos}, + {VKD3DSIH_ASIN, GLSLstd450Asin}, + {VKD3DSIH_ATAN, GLSLstd450Atan}, +@@ -7049,6 +7182,16 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp + unsigned int i, component_count; + enum GLSLstd450 glsl_inst; + ++ if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI ++ || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) ++ { ++ /* At least some drivers support this anyway, but if validation is enabled it will fail. */ ++ FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, ++ "64-bit source for handler %#x is not supported.", instruction->handler_idx); ++ return; ++ } ++ + glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); + if (glsl_inst == GLSLstd450Bad) + { +@@ -7093,8 +7236,8 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, + struct vkd3d_shader_register_info dst_reg_info, src_reg_info; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; ++ unsigned int i, component_count, write_mask; + uint32_t components[VKD3D_VEC4_SIZE]; +- unsigned int i, component_count; + + if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA + || dst->modifiers || src->modifiers) +@@ -7145,7 +7288,13 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, + } + + general_implementation: +- val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); ++ write_mask = dst->write_mask; ++ if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) ++ write_mask = vsir_write_mask_64_from_32(write_mask); ++ else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) ++ write_mask = vsir_write_mask_32_from_64(write_mask); ++ ++ val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); + if (dst->reg.data_type != src->reg.data_type) + { + val_id = vkd3d_spirv_build_op_bitcast(builder, vkd3d_spirv_get_type_id_for_data_type(builder, +@@ -7171,8 +7320,15 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, + type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); + + if (src[0].reg.data_type != VKD3D_DATA_BOOL) +- condition_id = spirv_compiler_emit_int_to_bool(compiler, +- VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); ++ { ++ if (instruction->handler_idx == VKD3DSIH_CMP) ++ condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, ++ vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, ++ spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); ++ else ++ condition_id = spirv_compiler_emit_int_to_bool(compiler, ++ VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); ++ } + val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src1_id, src2_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +@@ -7335,7 +7491,7 @@ static void spirv_compiler_emit_imad(struct spirv_compiler *compiler, + unsigned int i, component_count; + + component_count = vsir_write_mask_component_count(dst->write_mask); +- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, component_count); ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); + + for (i = 0; i < ARRAY_SIZE(src_ids); ++i) + src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], dst->write_mask); +@@ -7684,6 +7840,56 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co + spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); + } + ++static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, src0_id, src1_id, val_id; ++ ++ type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); ++ src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); ++ src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); ++ /* OpOrdered and OpUnordered are only available in Kernel mode. */ ++ src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); ++ src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); ++ val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); ++ if (instruction->handler_idx == VKD3DSIH_ORD) ++ val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t src0_id, src1_id, type_id, result_id; ++ unsigned int component_count; ++ SpvOp op; ++ ++ switch (instruction->handler_idx) ++ { ++ case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; ++ case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ component_count = vsir_write_mask_component_count(dst->write_mask); ++ ++ src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); ++ src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); ++ ++ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); ++ result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id); ++ ++ result_id = spirv_compiler_emit_bool_to_float(compiler, component_count, result_id, false); ++ spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); ++} ++ + static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction, uint32_t target_block_id) + { +@@ -7702,11 +7908,31 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co + return merge_block_id; + } + ++static void spirv_compiler_end_invocation_interlock(struct spirv_compiler *compiler) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ ++ if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilitySampleRateShading)) ++ { ++ spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeSampleInterlockOrderedEXT, NULL, 0); ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderSampleInterlockEXT); ++ } ++ else ++ { ++ spirv_compiler_emit_execution_mode(compiler, SpvExecutionModePixelInterlockOrderedEXT, NULL, 0); ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderPixelInterlockEXT); ++ } ++ vkd3d_spirv_build_op(&builder->function_stream, SpvOpEndInvocationInterlockEXT); ++} ++ + static void spirv_compiler_emit_return(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + ++ if (compiler->use_invocation_interlock) ++ spirv_compiler_end_invocation_interlock(compiler); ++ + if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) + || is_in_control_point_phase(compiler))) + spirv_compiler_emit_shader_epilogue_invocation(compiler); +@@ -7790,8 +8016,9 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, + * a mismatch between the VSIR structure and the SPIR-V one, which would cause problems if + * structurisation is necessary. Therefore we emit it as a function call. */ + condition_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); +- condition_id = spirv_compiler_emit_int_to_bool(compiler, +- instruction->flags, src->reg.data_type, 1, condition_id); ++ if (src->reg.data_type != VKD3D_DATA_BOOL) ++ condition_id = spirv_compiler_emit_int_to_bool(compiler, ++ instruction->flags, src->reg.data_type, 1, condition_id); + void_id = vkd3d_spirv_get_op_type_void(builder); + vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), + &condition_id, 1); +@@ -8570,7 +8797,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, + ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); + constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); + } +- assert(dst->reg.data_type == VKD3D_DATA_UINT); + spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); + } + +@@ -8678,8 +8904,8 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t base_coordinate_id, component_idx; +- const struct vkd3d_shader_src_param *data; + struct vkd3d_shader_register_info reg_info; ++ struct vkd3d_shader_src_param data; + unsigned int component_count; + + if (!spirv_compiler_get_register_info(compiler, &dst->reg, ®_info)) +@@ -8691,9 +8917,9 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, + base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, + type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + +- data = &src[instruction->src_count - 1]; +- assert(data->reg.data_type == VKD3D_DATA_UINT); +- val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); ++ data = src[instruction->src_count - 1]; ++ data.reg.data_type = VKD3D_DATA_UINT; ++ val_id = spirv_compiler_emit_load_src(compiler, &data, dst->write_mask); + + component_count = vsir_write_mask_component_count(dst->write_mask); + for (component_idx = 0; component_idx < component_count; ++component_idx) +@@ -8944,6 +9170,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil + const struct vkd3d_shader_dst_param *resource; + uint32_t coordinate_id, sample_id, pointer_id; + struct vkd3d_shader_register_info reg_info; ++ SpvMemorySemanticsMask memory_semantic; + struct vkd3d_shader_image image; + unsigned int structure_stride; + uint32_t coordinate_mask; +@@ -9035,12 +9262,23 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil + + val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type); + ++ if (instruction->flags & VKD3DARF_VOLATILE) ++ { ++ WARN("Ignoring 'volatile' attribute.\n"); ++ spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, ++ "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); ++ } ++ ++ memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) ++ ? SpvMemorySemanticsSequentiallyConsistentMask ++ : SpvMemorySemanticsMaskNone; ++ + operands[i++] = pointer_id; + operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); +- operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); ++ operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); + if (instruction->src_count >= 3) + { +- operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); ++ operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); + operands[i++] = spirv_compiler_emit_load_src_with_type(compiler, &src[2], VKD3DSP_WRITEMASK_0, component_type); + } + operands[i++] = val_id; +@@ -9110,6 +9348,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, lod_id, val_id, miplevel_count_id; ++ enum vkd3d_shader_component_type component_type; + uint32_t constituents[VKD3D_VEC4_SIZE]; + unsigned int i, size_component_count; + struct vkd3d_shader_image image; +@@ -9146,10 +9385,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, + val_id = vkd3d_spirv_build_op_composite_construct(builder, + type_id, constituents, i + 2); + ++ component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + if (instruction->flags == VKD3DSI_RESINFO_UINT) + { +- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); ++ /* SSA registers must match the specified result type. */ ++ if (!register_is_ssa(&dst->reg)) ++ val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); ++ else ++ component_type = VKD3D_SHADER_COMPONENT_UINT; + } + else + { +@@ -9158,7 +9403,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, + val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); + } + val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, +- VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask); ++ component_type, src[1].swizzle, dst->write_mask); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } +@@ -9475,6 +9720,11 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) + + if (compiler->emit_point_size) + spirv_compiler_emit_point_size(compiler); ++ ++ /* Maybe in the future we can try to shrink the size of the interlocked ++ * section. */ ++ if (compiler->use_invocation_interlock) ++ vkd3d_spirv_build_op(&compiler->spirv_builder.function_stream, SpvOpBeginInvocationInterlockEXT); + } + + static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9549,6 +9799,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + break; + case VKD3DSIH_DMOVC: + case VKD3DSIH_MOVC: ++ case VKD3DSIH_CMP: + spirv_compiler_emit_movc(compiler, instruction); + break; + case VKD3DSIH_SWAPC: +@@ -9587,6 +9838,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_ISFINITE: + spirv_compiler_emit_isfinite(compiler, instruction); + break; ++ case VKD3DSIH_ABS: + case VKD3DSIH_ACOS: + case VKD3DSIH_ASIN: + case VKD3DSIH_ATAN: +@@ -9669,6 +9921,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_ULT: + spirv_compiler_emit_comparison_instruction(compiler, instruction); + break; ++ case VKD3DSIH_ORD: ++ case VKD3DSIH_UNO: ++ spirv_compiler_emit_orderedness_instruction(compiler, instruction); ++ break; ++ case VKD3DSIH_SLT: ++ case VKD3DSIH_SGE: ++ spirv_compiler_emit_float_comparison_instruction(compiler, instruction); ++ break; + case VKD3DSIH_BFI: + case VKD3DSIH_IBFE: + case VKD3DSIH_UBFE: +@@ -9796,7 +10056,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + spirv_compiler_emit_cut_stream(compiler, instruction); + break; + case VKD3DSIH_DCL: +- case VKD3DSIH_DCL_CONSTANT_BUFFER: + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + case VKD3DSIH_DCL_INPUT_SGV: +@@ -9899,13 +10158,14 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; + struct vkd3d_shader_instruction_array instructions; + struct vsir_program *program = &parser->program; ++ enum vkd3d_shader_spirv_environment environment; + enum vkd3d_result result = VKD3D_OK; + unsigned int i; + +- if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) ++ if ((result = vsir_program_normalise(program, compiler->config_flags, ++ compile_info, compiler->message_context)) < 0) + return result; + + if (program->temp_count) +@@ -9924,12 +10184,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + instructions = program->instructions; + memset(&program->instructions, 0, sizeof(program->instructions)); + +- compiler->input_signature = shader_desc->input_signature; +- compiler->output_signature = shader_desc->output_signature; +- compiler->patch_constant_signature = shader_desc->patch_constant_signature; +- memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); +- memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); +- memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); ++ compiler->input_signature = program->input_signature; ++ compiler->output_signature = program->output_signature; ++ compiler->patch_constant_signature = program->patch_constant_signature; ++ memset(&program->input_signature, 0, sizeof(program->input_signature)); ++ memset(&program->output_signature, 0, sizeof(program->output_signature)); ++ memset(&program->patch_constant_signature, 0, sizeof(program->patch_constant_signature)); + compiler->use_vocp = program->use_vocp; + compiler->block_names = program->block_names; + compiler->block_name_count = program->block_name_count; +@@ -9985,12 +10245,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + if (compiler->strip_debug) + vkd3d_spirv_stream_clear(&builder->debug_stream); + +- if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler))) ++ environment = spirv_compiler_get_target_environment(compiler); ++ if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler), environment)) + return VKD3D_ERROR; + + if (TRACE_ON() || parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) + { +- enum vkd3d_shader_spirv_environment environment = spirv_compiler_get_target_environment(compiler); + struct vkd3d_string_buffer buffer; + + if (TRACE_ON()) +@@ -10018,7 +10278,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) + { + struct vkd3d_shader_code text; +- enum vkd3d_shader_spirv_environment environment = spirv_compiler_get_target_environment(compiler); + if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) + return VKD3D_ERROR; + vkd3d_shader_free_shader_code(spirv); +@@ -10036,8 +10295,8 @@ int spirv_compile(struct vkd3d_shader_parser *parser, + struct spirv_compiler *spirv_compiler; + int ret; + +- if (!(spirv_compiler = spirv_compiler_create(&parser->program.shader_version, &parser->shader_desc, +- compile_info, scan_descriptor_info, message_context, &parser->location, parser->config_flags))) ++ if (!(spirv_compiler = spirv_compiler_create(&parser->program, compile_info, ++ scan_descriptor_info, message_context, &parser->location, parser->config_flags))) + { + ERR("Failed to create SPIR-V compiler.\n"); + return VKD3D_ERROR; +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 3be4e40ab0c..6ee06c02d74 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -954,32 +954,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins + case VKD3DSPR_INCONTROLPOINT: + io_masks = priv->input_register_masks; + ranges = &priv->input_index_ranges; +- signature = &priv->p.shader_desc.input_signature; ++ signature = &priv->p.program.input_signature; + break; + case VKD3DSPR_OUTPUT: + if (sm4_parser_is_in_fork_or_join_phase(priv)) + { + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; +- signature = &priv->p.shader_desc.patch_constant_signature; ++ signature = &priv->p.program.patch_constant_signature; + } + else + { + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; +- signature = &priv->p.shader_desc.output_signature; ++ signature = &priv->p.program.output_signature; + } + break; + case VKD3DSPR_COLOROUT: + case VKD3DSPR_OUTCONTROLPOINT: + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; +- signature = &priv->p.shader_desc.output_signature; ++ signature = &priv->p.program.output_signature; + break; + case VKD3DSPR_PATCHCONST: + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; +- signature = &priv->p.shader_desc.patch_constant_signature; ++ signature = &priv->p.program.patch_constant_signature; + break; + + default: +@@ -1113,7 +1113,7 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u + if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) + { + struct signature_element *e = vsir_signature_find_element_for_reg( +- &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); ++ &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + + e->interpolation_mode = ins->flags; + } +@@ -1128,7 +1128,7 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in + if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) + { + struct signature_element *e = vsir_signature_find_element_for_reg( +- &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); ++ &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + + e->interpolation_mode = ins->flags; + } +@@ -1263,6 +1263,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u + ins->declaration.tgsm_raw.byte_count = *tokens; + if (ins->declaration.tgsm_raw.byte_count % 4) + FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); ++ ins->declaration.tgsm_raw.zero_init = false; + } + + static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1274,6 +1275,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction + ins->declaration.tgsm_structured.structure_count = *tokens; + if (ins->declaration.tgsm_structured.byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); ++ ins->declaration.tgsm_structured.zero_init = false; + } + + static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1748,7 +1750,6 @@ static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + + vsir_program_cleanup(&parser->program); +- free_shader_desc(&parser->shader_desc); + vkd3d_free(sm4); + } + +@@ -2504,7 +2505,7 @@ static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = + }; + + static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, +- size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, ++ size_t byte_code_size, const char *source_name, + struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_version version; +@@ -2648,9 +2649,9 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) + { + struct vkd3d_shader_instruction_array *instructions; +- struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_sm4_parser *sm4; ++ struct dxbc_shader_desc dxbc_desc = {0}; + int ret; + + if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) +@@ -2659,36 +2660,40 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +- shader_desc = &sm4->p.shader_desc; +- shader_desc->is_dxil = false; ++ dxbc_desc.is_dxil = false; + if ((ret = shader_extract_from_dxbc(&compile_info->source, +- message_context, compile_info->source_name, shader_desc)) < 0) ++ message_context, compile_info->source_name, &dxbc_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm4); + return ret; + } + +- if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, +- compile_info->source_name, &shader_desc->output_signature, message_context)) ++ if (!shader_sm4_init(sm4, dxbc_desc.byte_code, dxbc_desc.byte_code_size, ++ compile_info->source_name, message_context)) + { + WARN("Failed to initialise shader parser.\n"); +- free_shader_desc(shader_desc); ++ free_dxbc_shader_desc(&dxbc_desc); + vkd3d_free(sm4); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ++ sm4->p.program.input_signature = dxbc_desc.input_signature; ++ sm4->p.program.output_signature = dxbc_desc.output_signature; ++ sm4->p.program.patch_constant_signature = dxbc_desc.patch_constant_signature; ++ memset(&dxbc_desc, 0, sizeof(dxbc_desc)); ++ + /* DXBC stores used masks inverted for output signatures, for some reason. + * We return them un-inverted. */ +- uninvert_used_masks(&shader_desc->output_signature); ++ uninvert_used_masks(&sm4->p.program.output_signature); + if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL) +- uninvert_used_masks(&shader_desc->patch_constant_signature); ++ uninvert_used_masks(&sm4->p.program.patch_constant_signature); + +- if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, ++ if (!shader_sm4_parser_validate_signature(sm4, &sm4->p.program.input_signature, + sm4->input_register_masks, "Input") +- || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, ++ || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.output_signature, + sm4->output_register_masks, "Output") +- || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, ++ || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.patch_constant_signature, + sm4->patch_constant_register_masks, "Patch constant")) + { + shader_sm4_destroy(&sm4->p); +@@ -2721,7 +2726,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + shader_sm4_validate_default_phase_index_ranges(sm4); + + if (!sm4->p.failed) +- vsir_validate(&sm4->p); ++ vkd3d_shader_parser_validate(&sm4->p); + + if (sm4->p.failed) + { +@@ -2989,26 +2994,28 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + { + switch (type->class) + { +- case HLSL_CLASS_ARRAY: +- return sm4_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3D_SVC_MATRIX_COLUMNS; + else + return D3D_SVC_MATRIX_ROWS; +- case HLSL_CLASS_OBJECT: +- return D3D_SVC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3D_SVC_SCALAR; +- case HLSL_CLASS_STRUCT: +- return D3D_SVC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3D_SVC_VECTOR; +- default: +- ERR("Invalid class %#x.\n", type->class); +- vkd3d_unreachable(); ++ ++ case HLSL_CLASS_ARRAY: ++ case HLSL_CLASS_STRUCT: ++ case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VOID: ++ break; + } ++ vkd3d_unreachable(); + } + + static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) +@@ -3024,68 +3031,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) + return D3D_SVT_FLOAT; + case HLSL_TYPE_INT: + return D3D_SVT_INT; +- case HLSL_TYPE_PIXELSHADER: +- return D3D_SVT_PIXELSHADER; +- case HLSL_TYPE_SAMPLER: +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3D_SVT_SAMPLER1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3D_SVT_SAMPLER2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3D_SVT_SAMPLER3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3D_SVT_SAMPLERCUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3D_SVT_SAMPLER; +- default: +- vkd3d_unreachable(); +- } +- break; +- case HLSL_TYPE_STRING: +- return D3D_SVT_STRING; +- case HLSL_TYPE_TEXTURE: +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3D_SVT_TEXTURE1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3D_SVT_TEXTURE2D; +- case HLSL_SAMPLER_DIM_2DMS: +- return D3D_SVT_TEXTURE2DMS; +- case HLSL_SAMPLER_DIM_3D: +- return D3D_SVT_TEXTURE3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3D_SVT_TEXTURECUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3D_SVT_TEXTURE; +- default: +- vkd3d_unreachable(); +- } +- break; + case HLSL_TYPE_UINT: + return D3D_SVT_UINT; +- case HLSL_TYPE_VERTEXSHADER: +- return D3D_SVT_VERTEXSHADER; +- case HLSL_TYPE_VOID: +- return D3D_SVT_VOID; +- case HLSL_TYPE_UAV: +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3D_SVT_RWTEXTURE1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3D_SVT_RWTEXTURE2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3D_SVT_RWTEXTURE3D; +- case HLSL_SAMPLER_DIM_1DARRAY: +- return D3D_SVT_RWTEXTURE1DARRAY; +- case HLSL_SAMPLER_DIM_2DARRAY: +- return D3D_SVT_RWTEXTURE2DARRAY; +- default: +- vkd3d_unreachable(); +- } + default: + vkd3d_unreachable(); + } +@@ -3096,8 +3043,8 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + const char *name = array_type->name ? array_type->name : ""; + const struct hlsl_profile_info *profile = ctx->profile; +- unsigned int field_count = 0, array_size = 0; +- size_t fields_offset = 0, name_offset = 0; ++ unsigned int array_size = 0; ++ size_t name_offset = 0; + size_t i; + + if (type->bytecode_offset) +@@ -3111,32 +3058,47 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + + if (array_type->class == HLSL_CLASS_STRUCT) + { +- field_count = array_type->e.record.field_count; ++ unsigned int field_count = 0; ++ size_t fields_offset = 0; + +- for (i = 0; i < field_count; ++i) ++ for (i = 0; i < array_type->e.record.field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + ++ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) ++ continue; ++ + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm4_type(ctx, buffer, field->type); ++ ++field_count; + } + + fields_offset = bytecode_align(buffer); + +- for (i = 0; i < field_count; ++i) ++ for (i = 0; i < array_type->e.record.field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + ++ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) ++ continue; ++ + put_u32(buffer, field->name_bytecode_offset); + put_u32(buffer, field->type->bytecode_offset); + put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); + } ++ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); ++ put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); ++ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); ++ put_u32(buffer, fields_offset); ++ } ++ else ++ { ++ assert(array_type->class <= HLSL_CLASS_LAST_NUMERIC); ++ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); ++ put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); ++ put_u32(buffer, vkd3d_make_u32(array_size, 0)); ++ put_u32(buffer, 1); + } +- +- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); +- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); +- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); +- put_u32(buffer, fields_offset); + + if (profile->major_version >= 5) + { +@@ -3150,20 +3112,21 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + + static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) + { +- if (type->class == HLSL_CLASS_ARRAY) +- return sm4_resource_type(type->e.array.type); +- +- switch (type->base_type) ++ switch (type->class) + { +- case HLSL_TYPE_SAMPLER: ++ case HLSL_CLASS_ARRAY: ++ return sm4_resource_type(type->e.array.type); ++ case HLSL_CLASS_SAMPLER: + return D3D_SIT_SAMPLER; +- case HLSL_TYPE_TEXTURE: ++ case HLSL_CLASS_TEXTURE: + return D3D_SIT_TEXTURE; +- case HLSL_TYPE_UAV: ++ case HLSL_CLASS_UAV: + return D3D_SIT_UAV_RWTYPED; + default: +- vkd3d_unreachable(); ++ break; + } ++ ++ vkd3d_unreachable(); + } + + static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) +@@ -3328,7 +3291,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + + extern_resources[*count].name = name; + extern_resources[*count].data_type = component_type; +- extern_resources[*count].is_user_packed = false; ++ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id + regset_offset; +@@ -3428,10 +3391,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + + if (profile->major_version >= 5) + { +- put_u32(&buffer, TAG_RD11); ++ put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); + put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ + put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ +- put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ ++ put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ + put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ + put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ +@@ -3448,6 +3411,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + const struct extern_resource *resource = &extern_resources[i]; + uint32_t flags = 0; + ++ if (hlsl_version_ge(ctx, 5, 1)) ++ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); ++ + if (resource->is_user_packed) + flags |= D3D_SIF_USERPACKED; + +@@ -3480,6 +3446,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + if (!cbuffer->reg.allocated) + continue; + ++ if (hlsl_version_ge(ctx, 5, 1)) ++ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); ++ + if (cbuffer->reservation.reg_type) + flags |= D3D_SIF_USERPACKED; + +@@ -3523,8 +3492,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (var->is_uniform && var->buffer == cbuffer +- && var->data_type->class != HLSL_CLASS_OBJECT) ++ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) + ++var_count; + } + +@@ -3558,8 +3526,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (var->is_uniform && var->buffer == cbuffer +- && var->data_type->class != HLSL_CLASS_OBJECT) ++ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) + { + uint32_t flags = 0; + +@@ -3586,8 +3553,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + j = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (var->is_uniform && var->buffer == cbuffer +- && var->data_type->class != HLSL_CLASS_OBJECT) ++ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) + { + const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); + size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); +@@ -4598,7 +4564,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node + enum hlsl_sampler_dim dim) + { + const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); +- bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE ++ bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; +@@ -5389,7 +5355,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + &expr->node, arg1, arg2); + break; + +- case HLSL_OP3_MOVC: ++ case HLSL_OP3_TERNARY: + write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); + break; + +@@ -5445,7 +5411,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju + + case HLSL_IR_JUMP_DISCARD_NZ: + { +- instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; ++ instr.opcode = VKD3D_SM4_OP_DISCARD; ++ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; + + memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); + instr.src_count = 1; +@@ -5746,18 +5713,12 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc + { + if (instr->data_type) + { +- if (instr->data_type->class == HLSL_CLASS_MATRIX) ++ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { +- hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", ++ instr->data_type->class); + break; + } +- else if (instr->data_type->class == HLSL_CLASS_OBJECT) +- { +- hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); +- break; +- } +- +- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + + if (!instr->reg.allocated) + { +@@ -5854,13 +5815,21 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) ++ { ++ if (hlsl_version_ge(ctx, 5, 1)) ++ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); ++ + write_sm4_dcl_constant_buffer(&tpf, cbuffer); ++ } + } + + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + ++ if (hlsl_version_ge(ctx, 5, 1)) ++ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); ++ + if (resource->regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&tpf, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 4f400d19f6f..cb37efb53f7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -23,6 +23,8 @@ + #include + #include + ++/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ ++ + static inline int char_to_int(char c) + { + if ('0' <= c && c <= '9') +@@ -71,8 +73,16 @@ void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer) + + void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer) + { +- buffer->buffer[0] = '\0'; +- buffer->content_size = 0; ++ vkd3d_string_buffer_truncate(buffer, 0); ++} ++ ++void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size) ++{ ++ if (size < buffer->content_size) ++ { ++ buffer->buffer[size] = '\0'; ++ buffer->content_size = size; ++ } + } + + static bool vkd3d_string_buffer_resize(struct vkd3d_string_buffer *buffer, int rc) +@@ -224,6 +234,16 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct + cache->buffers[cache->count++] = buffer; + } + ++void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer) ++{ ++ code->code = buffer->buffer; ++ code->size = buffer->content_size; ++ ++ buffer->buffer = NULL; ++ buffer->buffer_size = 0; ++ buffer->content_size = 0; ++} ++ + void vkd3d_shader_message_context_init(struct vkd3d_shader_message_context *context, + enum vkd3d_shader_log_level log_level) + { +@@ -1438,11 +1458,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + + if (!ret && signature_info) + { +- if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) ++ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->program.input_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, +- &parser->shader_desc.output_signature) ++ &parser->program.output_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, +- &parser->shader_desc.patch_constant_signature)) ++ &parser->program.patch_constant_signature)) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + } +@@ -1470,60 +1490,6 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + return ret; + } + +-static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context) +-{ +- struct vkd3d_shader_parser *parser; +- int ret; +- +- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) +- { +- WARN("Failed to initialise shader parser.\n"); +- return ret; +- } +- +- ret = scan_with_parser(compile_info, message_context, NULL, parser); +- vkd3d_shader_parser_destroy(parser); +- +- return ret; +-} +- +-static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context) +-{ +- struct vkd3d_shader_parser *parser; +- int ret; +- +- if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) +- { +- WARN("Failed to initialise shader parser.\n"); +- return ret; +- } +- +- ret = scan_with_parser(compile_info, message_context, NULL, parser); +- vkd3d_shader_parser_destroy(parser); +- +- return ret; +-} +- +-static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context) +-{ +- struct vkd3d_shader_parser *parser; +- int ret; +- +- if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) +- { +- WARN("Failed to initialise shader parser.\n"); +- return ret; +- } +- +- ret = scan_with_parser(compile_info, message_context, NULL, parser); +- vkd3d_shader_parser_destroy(parser); +- +- return ret; +-} +- + int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) + { + struct vkd3d_shader_message_context message_context; +@@ -1543,29 +1509,44 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + + vkd3d_shader_dump_shader(compile_info); + +- switch (compile_info->source_type) ++ if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) + { +- case VKD3D_SHADER_SOURCE_DXBC_TPF: +- ret = scan_dxbc(compile_info, &message_context); +- break; ++ FIXME("HLSL support not implemented.\n"); ++ ret = VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ else ++ { ++ struct vkd3d_shader_parser *parser; + +- case VKD3D_SHADER_SOURCE_HLSL: +- FIXME("HLSL support not implemented.\n"); +- ret = VKD3D_ERROR_NOT_IMPLEMENTED; +- break; ++ switch (compile_info->source_type) ++ { ++ case VKD3D_SHADER_SOURCE_D3D_BYTECODE: ++ ret = vkd3d_shader_sm1_parser_create(compile_info, &message_context, &parser); ++ break; + +- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: +- ret = scan_d3dbc(compile_info, &message_context); +- break; ++ case VKD3D_SHADER_SOURCE_DXBC_TPF: ++ ret = vkd3d_shader_sm4_parser_create(compile_info, &message_context, &parser); ++ break; + +- case VKD3D_SHADER_SOURCE_DXBC_DXIL: +- ret = scan_dxil(compile_info, &message_context); +- break; ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ ret = vkd3d_shader_sm6_parser_create(compile_info, &message_context, &parser); ++ break; + +- default: +- ERR("Unsupported source type %#x.\n", compile_info->source_type); +- ret = VKD3D_ERROR_INVALID_ARGUMENT; +- break; ++ default: ++ ERR("Unsupported source type %#x.\n", compile_info->source_type); ++ ret = VKD3D_ERROR_INVALID_ARGUMENT; ++ break; ++ } ++ ++ if (ret < 0) ++ { ++ WARN("Failed to create shader parser.\n"); ++ } ++ else ++ { ++ ret = scan_with_parser(compile_info, &message_context, NULL, parser); ++ vkd3d_shader_parser_destroy(parser); ++ } + } + + vkd3d_shader_message_context_trace_messages(&message_context); +@@ -1575,12 +1556,12 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + return ret; + } + +-static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, ++int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; +- struct vkd3d_glsl_generator *glsl_generator; ++ struct vsir_program *program = &parser->program; + struct vkd3d_shader_compile_info scan_info; + int ret; + +@@ -1589,22 +1570,13 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + switch (compile_info->target_type) + { + case VKD3D_SHADER_TARGET_D3D_ASM: +- ret = vkd3d_dxbc_binary_to_text(&parser->program, compile_info, out, VSIR_ASM_D3D); ++ ret = d3d_asm_compile(program, compile_info, out, VSIR_ASM_FLAG_NONE); + break; + + case VKD3D_SHADER_TARGET_GLSL: + if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) + return ret; +- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->program.shader_version, +- message_context, &parser->location))) +- { +- ERR("Failed to create GLSL generator.\n"); +- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); +- return VKD3D_ERROR; +- } +- +- ret = vkd3d_glsl_generator_generate(glsl_generator, &parser->program, out); +- vkd3d_glsl_generator_destroy(glsl_generator); ++ ret = glsl_compile(program, parser->config_flags, compile_info, out, message_context); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + break; + +@@ -1624,24 +1596,6 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + return ret; + } + +-static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +-{ +- struct vkd3d_shader_parser *parser; +- int ret; +- +- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) +- { +- WARN("Failed to initialise shader parser.\n"); +- return ret; +- } +- +- ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); +- +- vkd3d_shader_parser_destroy(parser); +- return ret; +-} +- + static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { +@@ -1657,42 +1611,6 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, + return ret; + } + +-static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +-{ +- struct vkd3d_shader_parser *parser; +- int ret; +- +- if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) +- { +- WARN("Failed to initialise shader parser.\n"); +- return ret; +- } +- +- ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); +- +- vkd3d_shader_parser_destroy(parser); +- return ret; +-} +- +-static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +-{ +- struct vkd3d_shader_parser *parser; +- int ret; +- +- if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) +- { +- WARN("Failed to initialise shader parser.\n"); +- return ret; +- } +- +- ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); +- +- vkd3d_shader_parser_destroy(parser); +- return ret; +-} +- + int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages) + { +@@ -1713,26 +1631,43 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + + vkd3d_shader_dump_shader(compile_info); + +- switch (compile_info->source_type) ++ if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) + { +- case VKD3D_SHADER_SOURCE_DXBC_TPF: +- ret = compile_dxbc_tpf(compile_info, out, &message_context); +- break; ++ ret = compile_hlsl(compile_info, out, &message_context); ++ } ++ else ++ { ++ struct vkd3d_shader_parser *parser; + +- case VKD3D_SHADER_SOURCE_HLSL: +- ret = compile_hlsl(compile_info, out, &message_context); +- break; ++ switch (compile_info->source_type) ++ { ++ case VKD3D_SHADER_SOURCE_D3D_BYTECODE: ++ ret = vkd3d_shader_sm1_parser_create(compile_info, &message_context, &parser); ++ break; + +- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: +- ret = compile_d3d_bytecode(compile_info, out, &message_context); +- break; ++ case VKD3D_SHADER_SOURCE_DXBC_TPF: ++ ret = vkd3d_shader_sm4_parser_create(compile_info, &message_context, &parser); ++ break; + +- case VKD3D_SHADER_SOURCE_DXBC_DXIL: +- ret = compile_dxbc_dxil(compile_info, out, &message_context); +- break; ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ ret = vkd3d_shader_sm6_parser_create(compile_info, &message_context, &parser); ++ break; + +- default: +- vkd3d_unreachable(); ++ default: ++ ERR("Unsupported source type %#x.\n", compile_info->source_type); ++ ret = VKD3D_ERROR_INVALID_ARGUMENT; ++ break; ++ } ++ ++ if (ret < 0) ++ { ++ WARN("Failed to create shader parser.\n"); ++ } ++ else ++ { ++ ret = vkd3d_shader_parser_compile(parser, compile_info, out, &message_context); ++ vkd3d_shader_parser_destroy(parser); ++ } + } + + vkd3d_shader_message_context_trace_messages(&message_context); +@@ -1937,13 +1872,18 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + VKD3D_SHADER_TARGET_SPIRV_TEXT, + #endif + VKD3D_SHADER_TARGET_D3D_ASM, +-#if 0 ++#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL + VKD3D_SHADER_TARGET_GLSL, + #endif + }; + + static const enum vkd3d_shader_target_type hlsl_types[] = + { ++ VKD3D_SHADER_TARGET_SPIRV_BINARY, ++#ifdef HAVE_SPIRV_TOOLS ++ VKD3D_SHADER_TARGET_SPIRV_TEXT, ++#endif ++ VKD3D_SHADER_TARGET_D3D_ASM, + VKD3D_SHADER_TARGET_D3D_BYTECODE, + VKD3D_SHADER_TARGET_DXBC_TPF, + VKD3D_SHADER_TARGET_FX, +@@ -1958,13 +1898,21 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + VKD3D_SHADER_TARGET_D3D_ASM, + }; + ++#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL ++ static const enum vkd3d_shader_target_type dxbc_dxil_types[] = ++ { ++ VKD3D_SHADER_TARGET_SPIRV_BINARY, ++# ifdef HAVE_SPIRV_TOOLS ++ VKD3D_SHADER_TARGET_SPIRV_TEXT, ++# endif ++ VKD3D_SHADER_TARGET_D3D_ASM, ++ }; ++#endif ++ + TRACE("source_type %#x, count %p.\n", source_type, count); + + switch (source_type) + { +-#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL +- case VKD3D_SHADER_SOURCE_DXBC_DXIL: +-#endif + case VKD3D_SHADER_SOURCE_DXBC_TPF: + *count = ARRAY_SIZE(dxbc_tpf_types); + return dxbc_tpf_types; +@@ -1977,6 +1925,12 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + *count = ARRAY_SIZE(d3dbc_types); + return d3dbc_types; + ++#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ *count = ARRAY_SIZE(dxbc_dxil_types); ++ return dxbc_dxil_types; ++#endif ++ + default: + *count = 0; + return NULL; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 2d3b3254638..1f4320968d3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -100,6 +100,7 @@ enum vkd3d_shader_error + + VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, + VKD3D_SHADER_WARNING_SPV_INVALID_UAV_FLAGS = 2301, ++ VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG = 2302, + + VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, + VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, +@@ -148,6 +149,8 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, + VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, + VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, ++ VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, ++ VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, +@@ -218,6 +221,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, + VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW = 9016, + VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION = 9018, + + VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, + }; +@@ -445,6 +449,7 @@ enum vkd3d_shader_opcode + VKD3DSIH_NOT, + VKD3DSIH_NRM, + VKD3DSIH_OR, ++ VKD3DSIH_ORD, + VKD3DSIH_PHASE, + VKD3DSIH_PHI, + VKD3DSIH_POW, +@@ -516,6 +521,7 @@ enum vkd3d_shader_opcode + VKD3DSIH_UMAX, + VKD3DSIH_UMIN, + VKD3DSIH_UMUL, ++ VKD3DSIH_UNO, + VKD3DSIH_USHR, + VKD3DSIH_UTOD, + VKD3DSIH_UTOF, +@@ -620,14 +626,16 @@ enum vkd3d_data_type + VKD3D_DATA_UINT8, + VKD3D_DATA_UINT64, + VKD3D_DATA_BOOL, ++ VKD3D_DATA_UINT16, ++ VKD3D_DATA_HALF, + + VKD3D_DATA_COUNT, + }; + + static inline bool data_type_is_integer(enum vkd3d_data_type data_type) + { +- return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT +- || data_type == VKD3D_DATA_UINT64; ++ return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT16 ++ || data_type == VKD3D_DATA_UINT || data_type == VKD3D_DATA_UINT64; + } + + static inline bool data_type_is_bool(enum vkd3d_data_type data_type) +@@ -635,6 +643,11 @@ static inline bool data_type_is_bool(enum vkd3d_data_type data_type) + return data_type == VKD3D_DATA_BOOL; + } + ++static inline bool data_type_is_floating_point(enum vkd3d_data_type data_type) ++{ ++ return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; ++} ++ + static inline bool data_type_is_64_bit(enum vkd3d_data_type data_type) + { + return data_type == VKD3D_DATA_DOUBLE || data_type == VKD3D_DATA_UINT64; +@@ -749,11 +762,21 @@ enum vkd3d_shader_uav_flags + VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, + }; + ++enum vkd3d_shader_atomic_rmw_flags ++{ ++ VKD3DARF_SEQ_CST = 0x1, ++ VKD3DARF_VOLATILE = 0x2, ++}; ++ + enum vkd3d_tessellator_domain + { ++ VKD3D_TESSELLATOR_DOMAIN_INVALID = 0, ++ + VKD3D_TESSELLATOR_DOMAIN_LINE = 1, + VKD3D_TESSELLATOR_DOMAIN_TRIANGLE = 2, + VKD3D_TESSELLATOR_DOMAIN_QUAD = 3, ++ ++ VKD3D_TESSELLATOR_DOMAIN_COUNT = 4, + }; + + #define VKD3DSI_NONE 0x0 +@@ -808,6 +831,8 @@ enum vkd3d_shader_type + VKD3D_SHADER_TYPE_COUNT, + }; + ++struct vkd3d_shader_message_context; ++ + struct vkd3d_shader_version + { + enum vkd3d_shader_type type; +@@ -1025,7 +1050,7 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade + unsigned int reg_idx, unsigned int write_mask); + void shader_signature_cleanup(struct shader_signature *signature); + +-struct vkd3d_shader_desc ++struct dxbc_shader_desc + { + const uint32_t *byte_code; + size_t byte_code_size; +@@ -1033,7 +1058,10 @@ struct vkd3d_shader_desc + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; ++}; + ++struct vkd3d_shader_desc ++{ + struct + { + uint32_t used, external; +@@ -1079,14 +1107,18 @@ struct vkd3d_shader_tgsm + struct vkd3d_shader_tgsm_raw + { + struct vkd3d_shader_dst_param reg; ++ unsigned int alignment; + unsigned int byte_count; ++ bool zero_init; + }; + + struct vkd3d_shader_tgsm_structured + { + struct vkd3d_shader_dst_param reg; ++ unsigned int alignment; + unsigned int byte_stride; + unsigned int structure_count; ++ bool zero_init; + }; + + struct vkd3d_shader_thread_group_size +@@ -1290,6 +1322,10 @@ struct vsir_program + struct vkd3d_shader_version shader_version; + struct vkd3d_shader_instruction_array instructions; + ++ struct shader_signature input_signature; ++ struct shader_signature output_signature; ++ struct shader_signature patch_constant_signature; ++ + unsigned int input_control_point_count, output_control_point_count; + unsigned int block_count; + unsigned int temp_count; +@@ -1302,6 +1338,10 @@ struct vsir_program + + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); + void vsir_program_cleanup(struct vsir_program *program); ++enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); ++enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, ++ const char *source_name, struct vkd3d_shader_message_context *message_context); + + static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( + struct vsir_program *program, unsigned int count) +@@ -1333,6 +1373,9 @@ struct vkd3d_shader_parser_ops + void (*parser_destroy)(struct vkd3d_shader_parser *parser); + }; + ++int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context); + void vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); + bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, +@@ -1347,6 +1390,12 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse + parser->ops->parser_destroy(parser); + } + ++static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser) ++{ ++ return vsir_program_validate(&parser->program, parser->config_flags, ++ parser->location.source_name, parser->message_context); ++} ++ + struct vkd3d_shader_descriptor_info1 + { + enum vkd3d_shader_descriptor_type type; +@@ -1385,21 +1434,22 @@ struct vkd3d_string_buffer_cache + size_t count, max_count, capacity; + }; + +-enum vsir_asm_dialect ++enum vsir_asm_flags + { +- VSIR_ASM_VSIR, +- VSIR_ASM_D3D, ++ VSIR_ASM_FLAG_NONE = 0, ++ VSIR_ASM_FLAG_DUMP_TYPES = 0x1, + }; + +-enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, ++enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect); ++ struct vkd3d_shader_code *out, enum vsir_asm_flags flags); + void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); + struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); + void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); + void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *list); + void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *list); + void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer); ++void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size); + int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); + int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); + int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); +@@ -1408,6 +1458,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct + vkd3d_string_buffer_trace_(buffer, __FUNCTION__) + void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function); + int vkd3d_string_buffer_vprintf(struct vkd3d_string_buffer *buffer, const char *format, va_list args); ++void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer); + + struct vkd3d_bytecode_buffer + { +@@ -1483,20 +1534,16 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); + +-void free_shader_desc(struct vkd3d_shader_desc *desc); ++void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); + + int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, +- struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); ++ struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc); + int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); + +-struct vkd3d_glsl_generator; +- +-struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, +- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); +-int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, +- struct vsir_program *program, struct vkd3d_shader_code *out); +-void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); ++int glsl_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context); + + #define SPIRV_MAX_SRC_COUNT 6 + +@@ -1513,17 +1560,17 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, + int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + +-enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser); +- + static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type( + enum vkd3d_data_type data_type) + { + switch (data_type) + { ++ case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ + case VKD3D_DATA_FLOAT: + case VKD3D_DATA_UNORM: + case VKD3D_DATA_SNORM: + return VKD3D_SHADER_COMPONENT_FLOAT; ++ case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ + case VKD3D_DATA_UINT: + return VKD3D_SHADER_COMPONENT_UINT; + case VKD3D_DATA_INT: +@@ -1585,6 +1632,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc + } + } + ++static inline bool component_type_is_64_bit(enum vkd3d_shader_component_type component_type) ++{ ++ return component_type == VKD3D_SHADER_COMPONENT_DOUBLE || component_type == VKD3D_SHADER_COMPONENT_UINT64; ++} ++ + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, + unsigned int index); + +@@ -1724,6 +1776,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ + return compacted_swizzle; + } + ++static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask) ++{ ++ static const unsigned int swizzles[16] = ++ { ++ 0, ++ VKD3D_SHADER_SWIZZLE(X, X, X, X), ++ VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), ++ VKD3D_SHADER_SWIZZLE(X, Y, X, X), ++ VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), ++ VKD3D_SHADER_SWIZZLE(X, Z, X, X), ++ VKD3D_SHADER_SWIZZLE(Y, Z, X, X), ++ VKD3D_SHADER_SWIZZLE(X, Y, Z, X), ++ VKD3D_SHADER_SWIZZLE(W, W, W, W), ++ VKD3D_SHADER_SWIZZLE(X, W, X, X), ++ VKD3D_SHADER_SWIZZLE(Y, W, X, X), ++ VKD3D_SHADER_SWIZZLE(X, Y, W, X), ++ VKD3D_SHADER_SWIZZLE(Z, W, X, X), ++ VKD3D_SHADER_SWIZZLE(X, Z, W, X), ++ VKD3D_SHADER_SWIZZLE(Y, Z, W, X), ++ VKD3D_SHADER_SWIZZLE(X, Y, Z, W), ++ }; ++ ++ return swizzles[writemask & 0xf]; ++} ++ + struct vkd3d_struct + { + enum vkd3d_shader_structure_type type; +@@ -1760,7 +1837,4 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void + void dxbc_writer_init(struct dxbc_writer *dxbc); + int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); + +-enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_compile_info *compile_info); +- + #endif /* __VKD3D_SHADER_PRIVATE_H */ +diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c +new file mode 100644 +index 00000000000..56ba6990420 +--- /dev/null ++++ b/libs/vkd3d/libs/vkd3d/cache.c +@@ -0,0 +1,59 @@ ++/* ++ * Copyright 2024 Stefan Dösinger for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#include "vkd3d_private.h" ++ ++struct vkd3d_shader_cache ++{ ++ unsigned int refcount; ++}; ++ ++int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) ++{ ++ struct vkd3d_shader_cache *object; ++ ++ TRACE("%p.\n", cache); ++ ++ object = vkd3d_malloc(sizeof(*object)); ++ if (!object) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ object->refcount = 1; ++ *cache = object; ++ ++ return VKD3D_OK; ++} ++ ++unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache) ++{ ++ unsigned int refcount = vkd3d_atomic_increment_u32(&cache->refcount); ++ TRACE("cache %p refcount %u.\n", cache, refcount); ++ return refcount; ++} ++ ++unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) ++{ ++ unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount); ++ TRACE("cache %p refcount %u.\n", cache, refcount); ++ ++ if (refcount) ++ return refcount; ++ ++ vkd3d_free(cache); ++ return 0; ++} +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 7115a74a6f2..4a69ff530da 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -5414,6 +5414,26 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 + | ((colour->uint32[2] & 0x3ff) << 22); + return vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + ++ case DXGI_FORMAT_B5G6R5_UNORM: ++ colour->uint32[0] = (colour->uint32[2] & 0x1f) ++ | ((colour->uint32[1] & 0x3f) << 5) ++ | ((colour->uint32[0] & 0x1f) << 11); ++ return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); ++ ++ case DXGI_FORMAT_B5G5R5A1_UNORM: ++ colour->uint32[0] = (colour->uint32[2] & 0x1f) ++ | ((colour->uint32[1] & 0x1f) << 5) ++ | ((colour->uint32[0] & 0x1f) << 10) ++ | ((colour->uint32[3] & 0x1) << 15); ++ return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); ++ ++ case DXGI_FORMAT_B4G4R4A4_UNORM: ++ colour->uint32[0] = (colour->uint32[2] & 0xf) ++ | ((colour->uint32[1] & 0xf) << 4) ++ | ((colour->uint32[0] & 0xf) << 8) ++ | ((colour->uint32[3] & 0xf) << 12); ++ return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); ++ + default: + return NULL; + } +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index 17c7ccb3e31..a394e3f7592 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -89,11 +89,13 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = + VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), + VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), + /* EXT extensions */ ++ VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), + VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), + VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), + VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), + VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), + VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), ++ VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), + VK_EXTENSION(EXT_MUTABLE_DESCRIPTOR_TYPE, EXT_mutable_descriptor_type), + VK_EXTENSION(EXT_ROBUSTNESS_2, EXT_robustness2), + VK_EXTENSION(EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_shader_demote_to_helper_invocation), +@@ -557,12 +559,14 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, + const struct vkd3d_optional_instance_extensions_info *optional_extensions; + const struct vkd3d_application_info *vkd3d_application_info; + const struct vkd3d_host_time_domain_info *time_domain_info; ++ PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion; + bool *user_extension_supported = NULL; + VkApplicationInfo application_info; + VkInstanceCreateInfo instance_info; + char application_name[PATH_MAX]; + uint32_t extension_count; + const char **extensions; ++ uint32_t vk_api_version; + VkInstance vk_instance; + VkResult vr; + HRESULT hr; +@@ -615,6 +619,16 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, + application_info.apiVersion = VK_API_VERSION_1_0; + instance->api_version = VKD3D_API_VERSION_1_0; + ++ /* vkEnumerateInstanceVersion was added in Vulkan 1.1, and its absence indicates only 1.0 is supported. */ ++ vkEnumerateInstanceVersion = (void *)vk_global_procs->vkGetInstanceProcAddr(NULL, "vkEnumerateInstanceVersion"); ++ if (vkEnumerateInstanceVersion && vkEnumerateInstanceVersion(&vk_api_version) >= 0 ++ && vk_api_version >= VK_API_VERSION_1_1) ++ { ++ TRACE("Vulkan API version 1.1 is available; requesting it.\n"); ++ application_info.apiVersion = VK_API_VERSION_1_1; ++ } ++ instance->vk_api_version = application_info.apiVersion; ++ + if ((vkd3d_application_info = vkd3d_find_struct(create_info->next, APPLICATION_INFO))) + { + if (vkd3d_application_info->application_name) +@@ -789,6 +803,7 @@ struct vkd3d_physical_device_info + VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_features; + VkPhysicalDeviceDepthClipEnableFeaturesEXT depth_clip_features; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features; ++ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_features; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2_features; + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote_features; + VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; +@@ -796,6 +811,7 @@ struct vkd3d_physical_device_info + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features; + VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT mutable_features; ++ VkPhysicalDevice4444FormatsFeaturesEXT formats4444_features; + + VkPhysicalDeviceFeatures2 features2; + }; +@@ -808,6 +824,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; ++ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; + VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; + VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; +@@ -818,6 +835,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; + VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; + VkPhysicalDevice physical_device = device->vk_physical_device; ++ VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; + VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; + +@@ -825,6 +843,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + conditional_rendering_features = &info->conditional_rendering_features; + depth_clip_features = &info->depth_clip_features; + descriptor_indexing_features = &info->descriptor_indexing_features; ++ fragment_shader_interlock_features = &info->fragment_shader_interlock_features; + robustness2_features = &info->robustness2_features; + descriptor_indexing_properties = &info->descriptor_indexing_properties; + maintenance3_properties = &info->maintenance3_properties; +@@ -835,6 +854,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + vertex_divisor_properties = &info->vertex_divisor_properties; + timeline_semaphore_features = &info->timeline_semaphore_features; + mutable_features = &info->mutable_features; ++ formats4444_features = &info->formats4444_features; + xfb_features = &info->xfb_features; + xfb_properties = &info->xfb_properties; + +@@ -846,6 +866,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + vk_prepend_struct(&info->features2, depth_clip_features); + descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; + vk_prepend_struct(&info->features2, descriptor_indexing_features); ++ fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, fragment_shader_interlock_features); + robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + vk_prepend_struct(&info->features2, robustness2_features); + demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; +@@ -860,6 +882,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + vk_prepend_struct(&info->features2, timeline_semaphore_features); + mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; + vk_prepend_struct(&info->features2, mutable_features); ++ formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, formats4444_features); + + if (vulkan_info->KHR_get_physical_device_properties2) + VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); +@@ -1158,6 +1182,7 @@ static void vkd3d_trace_physical_device_limits(const struct vkd3d_physical_devic + + static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_device_info *info) + { ++ const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; + const VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; + const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; + const VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; +@@ -1279,6 +1304,15 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev + TRACE(" VkPhysicalDeviceDepthClipEnableFeaturesEXT:\n"); + TRACE(" depthClipEnable: %#x.\n", depth_clip_features->depthClipEnable); + ++ fragment_shader_interlock_features = &info->fragment_shader_interlock_features; ++ TRACE(" VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT:\n"); ++ TRACE(" fragmentShaderSampleInterlock: %#x.\n.", ++ fragment_shader_interlock_features->fragmentShaderSampleInterlock); ++ TRACE(" fragmentShaderPixelInterlock: %#x\n.", ++ fragment_shader_interlock_features->fragmentShaderPixelInterlock); ++ TRACE(" fragmentShaderShadingRateInterlock: %#x\n.", ++ fragment_shader_interlock_features->fragmentShaderShadingRateInterlock); ++ + demote_features = &info->demote_features; + TRACE(" VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT:\n"); + TRACE(" shaderDemoteToHelperInvocation: %#x.\n", demote_features->shaderDemoteToHelperInvocation); +@@ -1476,6 +1510,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + uint32_t *device_extension_count, bool **user_extension_supported) + { + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; ++ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; + const struct vkd3d_optional_device_extensions_info *optional_extensions; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; + VkPhysicalDevice physical_device = device->vk_physical_device; +@@ -1539,8 +1574,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + + device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageReadWithoutFormat + && d3d12_device_supports_typed_uav_load_additional_formats(device); +- /* GL_INTEL_fragment_shader_ordering, no Vulkan equivalent. */ +- device->feature_options.ROVsSupported = FALSE; + /* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */ + device->feature_options.ConservativeRasterizationTier = D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED; + device->feature_options.MaxGPUVirtualAddressBitsPerResource = 40; /* FIXME */ +@@ -1619,6 +1652,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + *user_extension_supported, vulkan_info, "device", + device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); + ++ fragment_shader_interlock = &physical_device_info->fragment_shader_interlock_features; ++ if (!fragment_shader_interlock->fragmentShaderSampleInterlock ++ || !fragment_shader_interlock->fragmentShaderPixelInterlock) ++ vulkan_info->EXT_fragment_shader_interlock = false; ++ device->feature_options.ROVsSupported = vulkan_info->EXT_fragment_shader_interlock; ++ + if (!physical_device_info->conditional_rendering_features.conditionalRendering) + vulkan_info->EXT_conditional_rendering = false; + if (!physical_device_info->depth_clip_features.depthClipEnable) +@@ -1634,6 +1673,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + if (!physical_device_info->timeline_semaphore_features.timelineSemaphore) + vulkan_info->KHR_timeline_semaphore = false; + ++ physical_device_info->formats4444_features.formatA4B4G4R4 = VK_FALSE; ++ + vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; + + if (get_spec_version(vk_extensions, count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) +@@ -1675,6 +1716,10 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] + = VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING; + ++ if (vulkan_info->EXT_fragment_shader_interlock) ++ vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] ++ = VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK; ++ + if (vulkan_info->EXT_shader_stencil_export) + vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] + = VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT; +@@ -2498,18 +2543,286 @@ static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cach + } + } + ++/* ID3D12ShaderCacheSession */ ++struct d3d12_cache_session ++{ ++ ID3D12ShaderCacheSession ID3D12ShaderCacheSession_iface; ++ unsigned int refcount; ++ ++ struct list cache_list_entry; ++ ++ struct d3d12_device *device; ++ struct vkd3d_private_store private_store; ++ D3D12_SHADER_CACHE_SESSION_DESC desc; ++ struct vkd3d_shader_cache *cache; ++}; ++ ++static struct vkd3d_mutex cache_list_mutex = VKD3D_MUTEX_INITIALIZER; ++static struct list cache_list = LIST_INIT(cache_list); ++ ++static inline struct d3d12_cache_session *impl_from_ID3D12ShaderCacheSession(ID3D12ShaderCacheSession *iface) ++{ ++ return CONTAINING_RECORD(iface, struct d3d12_cache_session, ID3D12ShaderCacheSession_iface); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_cache_session_QueryInterface(ID3D12ShaderCacheSession *iface, ++ REFIID iid, void **object) ++{ ++ TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); ++ ++ if (!object) ++ { ++ WARN("Output pointer is NULL, returning E_POINTER.\n"); ++ return E_POINTER; ++ } ++ ++ if (IsEqualGUID(iid, &IID_ID3D12ShaderCacheSession) ++ || IsEqualGUID(iid, &IID_ID3D12DeviceChild) ++ || IsEqualGUID(iid, &IID_ID3D12Object) ++ || IsEqualGUID(iid, &IID_IUnknown)) ++ { ++ ID3D12ShaderCacheSession_AddRef(iface); ++ *object = iface; ++ return S_OK; ++ } ++ ++ WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); ++ ++ *object = NULL; ++ return E_NOINTERFACE; ++} ++ ++static ULONG STDMETHODCALLTYPE d3d12_cache_session_AddRef(ID3D12ShaderCacheSession *iface) ++{ ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ unsigned int refcount = vkd3d_atomic_increment_u32(&session->refcount); ++ ++ TRACE("%p increasing refcount to %u.\n", session, refcount); ++ ++ return refcount; ++} ++ ++static void d3d12_cache_session_destroy(struct d3d12_cache_session *session) ++{ ++ struct d3d12_device *device = session->device; ++ ++ TRACE("Destroying cache session %p.\n", session); ++ ++ vkd3d_mutex_lock(&cache_list_mutex); ++ list_remove(&session->cache_list_entry); ++ vkd3d_mutex_unlock(&cache_list_mutex); ++ ++ vkd3d_shader_cache_decref(session->cache); ++ vkd3d_private_store_destroy(&session->private_store); ++ vkd3d_free(session); ++ ++ d3d12_device_release(device); ++} ++ ++static ULONG STDMETHODCALLTYPE d3d12_cache_session_Release(ID3D12ShaderCacheSession *iface) ++{ ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ unsigned int refcount = vkd3d_atomic_decrement_u32(&session->refcount); ++ ++ TRACE("%p decreasing refcount to %u.\n", session, refcount); ++ ++ if (!refcount) ++ d3d12_cache_session_destroy(session); ++ ++ return refcount; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetPrivateData(ID3D12ShaderCacheSession *iface, ++ REFGUID guid, UINT *data_size, void *data) ++{ ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ ++ TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); ++ ++ return vkd3d_get_private_data(&session->private_store, guid, data_size, data); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetPrivateData(ID3D12ShaderCacheSession *iface, ++ REFGUID guid, UINT data_size, const void *data) ++{ ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ ++ TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); ++ ++ return vkd3d_set_private_data(&session->private_store, guid, data_size, data); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetPrivateDataInterface( ++ ID3D12ShaderCacheSession *iface, REFGUID guid, const IUnknown *data) ++{ ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ ++ TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); ++ ++ return vkd3d_set_private_data_interface(&session->private_store, guid, data); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetName(ID3D12ShaderCacheSession *iface, ++ const WCHAR *name) ++{ ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ ++ TRACE("iface %p, name %s.\n", iface, debugstr_w(name, session->device->wchar_size)); ++ ++ return name ? S_OK : E_INVALIDARG; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetDevice(ID3D12ShaderCacheSession *iface, ++ REFIID iid, void **device) ++{ ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ ++ TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); ++ ++ return d3d12_device_query_interface(session->device, iid, device); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_cache_session_FindValue(ID3D12ShaderCacheSession *iface, ++ const void *key, UINT key_size, void *value, UINT *value_size) ++{ ++ FIXME("iface %p, key %p, key_size %#x, value %p, value_size %p stub!\n", ++ iface, key, key_size, value, value_size); ++ ++ return DXGI_ERROR_NOT_FOUND; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_cache_session_StoreValue(ID3D12ShaderCacheSession *iface, ++ const void *key, UINT key_size, const void *value, UINT value_size) ++{ ++ FIXME("iface %p, key %p, key_size %#x, value %p, value_size %u stub!\n", iface, key, key_size, ++ value, value_size); ++ ++ return E_NOTIMPL; ++} ++ ++static void STDMETHODCALLTYPE d3d12_cache_session_SetDeleteOnDestroy(ID3D12ShaderCacheSession *iface) ++{ ++ FIXME("iface %p stub!\n", iface); ++} ++ ++static D3D12_SHADER_CACHE_SESSION_DESC * STDMETHODCALLTYPE d3d12_cache_session_GetDesc( ++ ID3D12ShaderCacheSession *iface, D3D12_SHADER_CACHE_SESSION_DESC *desc) ++{ ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ ++ TRACE("iface %p.\n", iface); ++ *desc = session->desc; ++ return desc; ++} ++ ++static const struct ID3D12ShaderCacheSessionVtbl d3d12_cache_session_vtbl = ++{ ++ /* IUnknown methods */ ++ d3d12_cache_session_QueryInterface, ++ d3d12_cache_session_AddRef, ++ d3d12_cache_session_Release, ++ /* ID3D12Object methods */ ++ d3d12_cache_session_GetPrivateData, ++ d3d12_cache_session_SetPrivateData, ++ d3d12_cache_session_SetPrivateDataInterface, ++ d3d12_cache_session_SetName, ++ /* ID3D12DeviceChild methods */ ++ d3d12_cache_session_GetDevice, ++ /* ID3D12ShaderCacheSession methods */ ++ d3d12_cache_session_FindValue, ++ d3d12_cache_session_StoreValue, ++ d3d12_cache_session_SetDeleteOnDestroy, ++ d3d12_cache_session_GetDesc, ++}; ++ ++static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, ++ struct d3d12_device *device, const D3D12_SHADER_CACHE_SESSION_DESC *desc) ++{ ++ struct d3d12_cache_session *i; ++ enum vkd3d_result ret; ++ HRESULT hr; ++ ++ session->ID3D12ShaderCacheSession_iface.lpVtbl = &d3d12_cache_session_vtbl; ++ session->refcount = 1; ++ session->desc = *desc; ++ session->cache = NULL; ++ ++ if (!session->desc.MaximumValueFileSizeBytes) ++ session->desc.MaximumValueFileSizeBytes = 128 * 1024 * 1024; ++ if (!session->desc.MaximumInMemoryCacheSizeBytes) ++ session->desc.MaximumInMemoryCacheSizeBytes = 1024 * 1024; ++ if (!session->desc.MaximumInMemoryCacheEntries) ++ session->desc.MaximumInMemoryCacheEntries = 128; ++ ++ if (FAILED(hr = vkd3d_private_store_init(&session->private_store))) ++ return hr; ++ ++ vkd3d_mutex_lock(&cache_list_mutex); ++ ++ /* We expect the number of open caches to be small. */ ++ LIST_FOR_EACH_ENTRY(i, &cache_list, struct d3d12_cache_session, cache_list_entry) ++ { ++ if (!memcmp(&i->desc.Identifier, &desc->Identifier, sizeof(desc->Identifier))) ++ { ++ TRACE("Found an existing cache %p from session %p.\n", i->cache, i); ++ if (desc->Version == i->desc.Version) ++ { ++ session->desc = i->desc; ++ vkd3d_shader_cache_incref(session->cache = i->cache); ++ break; ++ } ++ else ++ { ++ WARN("version mismatch: Existing %"PRIu64" new %"PRIu64".\n", ++ i->desc.Version, desc->Version); ++ hr = DXGI_ERROR_ALREADY_EXISTS; ++ goto error; ++ } ++ } ++ } ++ ++ if (!session->cache) ++ { ++ if (session->desc.Mode == D3D12_SHADER_CACHE_MODE_DISK) ++ FIXME("Disk caches are not yet implemented.\n"); ++ ++ ret = vkd3d_shader_open_cache(&session->cache); ++ if (ret) ++ { ++ WARN("Failed to open shader cache.\n"); ++ hr = hresult_from_vkd3d_result(ret); ++ goto error; ++ } ++ } ++ ++ /* Add it to the list even if we reused an existing cache. The other session might be destroyed, ++ * but the cache stays alive and can be opened a third time. */ ++ list_add_tail(&cache_list, &session->cache_list_entry); ++ d3d12_device_add_ref(session->device = device); ++ ++ vkd3d_mutex_unlock(&cache_list_mutex); ++ return S_OK; ++ ++error: ++ vkd3d_private_store_destroy(&session->private_store); ++ vkd3d_mutex_unlock(&cache_list_mutex); ++ return hr; ++} ++ + /* ID3D12Device */ +-static inline struct d3d12_device *impl_from_ID3D12Device7(ID3D12Device7 *iface) ++static inline struct d3d12_device *impl_from_ID3D12Device9(ID3D12Device9 *iface) + { +- return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device7_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device9_iface); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device9 *iface, + REFIID riid, void **object) + { + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + +- if (IsEqualGUID(riid, &IID_ID3D12Device7) ++ if (IsEqualGUID(riid, &IID_ID3D12Device9) ++ || IsEqualGUID(riid, &IID_ID3D12Device8) ++ || IsEqualGUID(riid, &IID_ID3D12Device7) + || IsEqualGUID(riid, &IID_ID3D12Device6) + || IsEqualGUID(riid, &IID_ID3D12Device5) + || IsEqualGUID(riid, &IID_ID3D12Device4) +@@ -2531,9 +2844,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *ifac + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device7 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device9 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + unsigned int refcount = vkd3d_atomic_increment_u32(&device->refcount); + + TRACE("%p increasing refcount to %u.\n", device, refcount); +@@ -2563,9 +2876,9 @@ static HRESULT device_worker_stop(struct d3d12_device *device) + return S_OK; + } + +-static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device9 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + unsigned int refcount = vkd3d_atomic_decrement_u32(&device->refcount); + + TRACE("%p decreasing refcount to %u.\n", device, refcount); +@@ -2602,10 +2915,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) + return refcount; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device9 *iface, + REFGUID guid, UINT *data_size, void *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -2613,10 +2926,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *ifac + return vkd3d_get_private_data(&device->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device9 *iface, + REFGUID guid, UINT data_size, const void *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -2624,19 +2937,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *ifac + return vkd3d_set_private_data(&device->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device9 *iface, + REFGUID guid, const IUnknown *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&device->private_store, guid, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, const WCHAR *name) ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device9 *iface, const WCHAR *name) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); + +@@ -2644,17 +2957,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, cons + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); + } + +-static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device7 *iface) ++static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device9 *iface) + { + TRACE("iface %p.\n", iface); + + return 1; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device9 *iface, + const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_command_queue *object; + HRESULT hr; + +@@ -2668,10 +2981,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 * + riid, command_queue); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device9 *iface, + D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_command_allocator *object; + HRESULT hr; + +@@ -2685,10 +2998,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic + riid, command_allocator); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device9 *iface, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + +@@ -2702,10 +3015,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 + &IID_ID3D12PipelineState, riid, pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device9 *iface, + const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + +@@ -2719,11 +3032,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D + &IID_ID3D12PipelineState, riid, pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device9 *iface, + UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, + ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_command_list *object; + HRESULT hr; + +@@ -2846,10 +3159,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) + return true; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 *iface, + D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", + iface, feature, feature_data, feature_data_size); +@@ -3521,10 +3834,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device9 *iface, + const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_descriptor_heap *object; + HRESULT hr; + +@@ -3538,7 +3851,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 + &IID_ID3D12DescriptorHeap, riid, descriptor_heap); + } + +-static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device7 *iface, ++static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device9 *iface, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) + { + TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); +@@ -3561,11 +3874,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device9 *iface, + UINT node_mask, const void *bytecode, SIZE_T bytecode_length, + REFIID riid, void **root_signature) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_root_signature *object; + HRESULT hr; + +@@ -3581,10 +3894,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 + &IID_ID3D12RootSignature, riid, root_signature); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device9 *iface, + const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); +@@ -3593,11 +3906,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device9 *iface, + ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, desc %p, descriptor %s.\n", +@@ -3607,11 +3920,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device9 *iface, + ID3D12Resource *resource, ID3D12Resource *counter_resource, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %s.\n", +@@ -3622,7 +3935,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device9 *iface, + ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +@@ -3630,10 +3943,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 + iface, resource, desc, debug_cpu_handle(descriptor)); + + d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), +- impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); ++ impl_from_ID3D12Device9(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device9 *iface, + ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +@@ -3641,13 +3954,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 + iface, resource, desc, debug_cpu_handle(descriptor)); + + d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), +- impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); ++ impl_from_ID3D12Device9(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device9 *iface, + const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); +@@ -3656,14 +3969,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device9 *iface, + UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, + const UINT *dst_descriptor_range_sizes, + UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, + const UINT *src_descriptor_range_sizes, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; + unsigned int dst_range_size, src_range_size; + struct d3d12_descriptor_heap *dst_heap; +@@ -3719,7 +4032,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, + } + } + +-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device9 *iface, + UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, + const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) +@@ -3850,10 +4163,10 @@ static void d3d12_device_get_resource_allocation_info(struct d3d12_device *devic + } + + static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( +- ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, ++ ID3D12Device9 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + UINT count, const D3D12_RESOURCE_DESC *resource_descs) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p.\n", + iface, info, visible_mask, count, resource_descs); +@@ -3865,10 +4178,10 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour + return info; + } + +-static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device7 *iface, ++static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device9 *iface, + D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + bool coherent; + + TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", +@@ -3908,12 +4221,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope + return heap_properties; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device9 *iface, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + D3D12_RESOURCE_DESC1 resource_desc; + struct d3d12_resource *object; + HRESULT hr; +@@ -3935,10 +4248,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi + return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device9 *iface, + const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_heap *object; + HRESULT hr; + +@@ -3954,12 +4267,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, + return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device9 *iface, + ID3D12Heap *heap, UINT64 heap_offset, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + D3D12_RESOURCE_DESC1 resource_desc; + struct d3d12_heap *heap_object; + struct d3d12_resource *object; +@@ -3980,11 +4293,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 + return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device9 *iface, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + D3D12_RESOURCE_DESC1 resource_desc; + struct d3d12_resource *object; + HRESULT hr; +@@ -4001,11 +4314,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic + return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device9 *iface, + ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, + const WCHAR *name, HANDLE *handle) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", + iface, object, attributes, (uint32_t)access, debugstr_w(name, device->wchar_size), handle); +@@ -4013,7 +4326,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 * + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device9 *iface, + HANDLE handle, REFIID riid, void **object) + { + FIXME("iface %p, handle %p, riid %s, object %p stub!\n", +@@ -4022,10 +4335,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *if + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device9 *iface, + const WCHAR *name, DWORD access, HANDLE *handle) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + FIXME("iface %p, name %s, access %#x, handle %p stub!\n", + iface, debugstr_w(name, device->wchar_size), (uint32_t)access, handle); +@@ -4033,7 +4346,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device9 *iface, + UINT object_count, ID3D12Pageable * const *objects) + { + ID3D12Fence *fence; +@@ -4041,17 +4354,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, + + TRACE("iface %p, object_count %u, objects %p.\n", iface, object_count, objects); + +- if (FAILED(hr = ID3D12Device7_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) ++ if (FAILED(hr = ID3D12Device9_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) + return hr; + +- hr = ID3D12Device7_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); ++ hr = ID3D12Device9_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); + if (SUCCEEDED(hr)) + ID3D12Fence_SetEventOnCompletion(fence, 1, NULL); + ID3D12Fence_Release(fence); + return hr; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device9 *iface, + UINT object_count, ID3D12Pageable * const *objects) + { + FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", +@@ -4060,10 +4373,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device9 *iface, + UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_fence *object; + HRESULT hr; + +@@ -4076,9 +4389,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, + return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device7 *iface) ++static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device9 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p.\n", iface); + +@@ -4163,12 +4476,12 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, + *total_bytes = total; + } + +-static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device9 *iface, + const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, + UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, + UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + D3D12_RESOURCE_DESC1 resource_desc; + + TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " +@@ -4182,10 +4495,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 * + base_offset, layouts, row_counts, row_sizes, total_bytes); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device9 *iface, + const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_query_heap *object; + HRESULT hr; + +@@ -4198,18 +4511,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *ifa + return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device7 *iface, BOOL enable) ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device9 *iface, BOOL enable) + { + FIXME("iface %p, enable %#x stub!\n", iface, enable); + + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device9 *iface, + const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, + REFIID iid, void **command_signature) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_command_signature *object; + HRESULT hr; + +@@ -4223,14 +4536,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic + &IID_ID3D12CommandSignature, iid, command_signature); + } + +-static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device9 *iface, + ID3D12Resource *resource, UINT *total_tile_count, + D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, + D3D12_SUBRESOURCE_TILING *sub_resource_tilings) + { + const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " + "standard_title_shape %p, sub_resource_tiling_count %p, " +@@ -4243,9 +4556,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *ifac + sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); + } + +-static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface, LUID *luid) ++static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device9 *iface, LUID *luid) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, luid %p.\n", iface, luid); + +@@ -4254,7 +4567,7 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface + return luid; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device9 *iface, + const void *blob, SIZE_T blob_size, REFIID iid, void **lib) + { + FIXME("iface %p, blob %p, blob_size %"PRIuPTR", iid %s, lib %p stub!\n", +@@ -4263,7 +4576,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device + return DXGI_ERROR_UNSUPPORTED; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device9 *iface, + ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, + D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) + { +@@ -4273,7 +4586,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion( + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device9 *iface, + UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) + { + FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); +@@ -4281,10 +4594,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device9 *iface, + const D3D12_PIPELINE_STATE_STREAM_DESC *desc, REFIID iid, void **pipeline_state) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + +@@ -4296,7 +4609,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 + return return_interface(&object->ID3D12PipelineState_iface, &IID_ID3D12PipelineState, iid, pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device9 *iface, + const void *address, REFIID iid, void **heap) + { + FIXME("iface %p, address %p, iid %s, heap %p stub!\n", iface, address, debugstr_guid(iid), heap); +@@ -4304,7 +4617,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12 + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device9 *iface, + HANDLE file_mapping, REFIID iid, void **heap) + { + FIXME("iface %p, file_mapping %p, iid %s, heap %p stub!\n", iface, file_mapping, debugstr_guid(iid), heap); +@@ -4312,7 +4625,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device9 *iface, + D3D12_RESIDENCY_FLAGS flags, UINT num_objects, ID3D12Pageable *const *objects, + ID3D12Fence *fence, UINT64 fence_value) + { +@@ -4323,7 +4636,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device9 *iface, + UINT node_mask, D3D12_COMMAND_LIST_TYPE type, D3D12_COMMAND_LIST_FLAGS flags, + REFIID iid, void **command_list) + { +@@ -4333,7 +4646,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 * + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device9 *iface, + const D3D12_PROTECTED_RESOURCE_SESSION_DESC *desc, REFIID iid, void **session) + { + FIXME("iface %p, desc %p, iid %s, session %p stub!\n", iface, desc, debugstr_guid(iid), session); +@@ -4341,13 +4654,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3 + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device9 *iface, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, + ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + D3D12_RESOURCE_DESC1 resource_desc; + struct d3d12_resource *object; + HRESULT hr; +@@ -4369,11 +4682,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Dev + return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device9 *iface, + const D3D12_HEAP_DESC *desc, ID3D12ProtectedResourceSession *protected_session, + REFIID iid, void **heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + struct d3d12_heap *object; + HRESULT hr; + +@@ -4389,7 +4702,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, + return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device9 *iface, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, + ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) +@@ -4403,11 +4716,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Devi + } + + static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo1( +- ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, ++ ID3D12Device9 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + UINT count, const D3D12_RESOURCE_DESC *resource_descs, + D3D12_RESOURCE_ALLOCATION_INFO1 *info1) + { +- struct d3d12_device *device = impl_from_ID3D12Device7(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); + + TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", + iface, info, visible_mask, count, resource_descs, info1); +@@ -4419,7 +4732,7 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour + return info; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device9 *iface, + ID3D12LifetimeOwner *owner, REFIID iid, void **tracker) + { + FIXME("iface %p, owner %p, iid %s, tracker %p stub!\n", iface, owner, debugstr_guid(iid), tracker); +@@ -4427,12 +4740,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device + return E_NOTIMPL; + } + +-static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device7 *iface) ++static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device9 *iface) + { + FIXME("iface %p stub!\n", iface); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device9 *iface, + UINT *num_meta_commands, D3D12_META_COMMAND_DESC *command_desc) + { + FIXME("iface %p, num_meta_commands %p, command_desc %p stub!\n", iface, +@@ -4441,7 +4754,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device9 *iface, + REFGUID command_id, D3D12_META_COMMAND_PARAMETER_STAGE stage, + UINT *size_in_bytes, UINT *parameter_count, + D3D12_META_COMMAND_PARAMETER_DESC *parameter_desc) +@@ -4453,7 +4766,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3 + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device9 *iface, + REFGUID command_id, UINT node_mask, const void *parameters_data, + SIZE_T data_size_in_bytes, REFIID iid, void **meta_command) + { +@@ -4465,7 +4778,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *i + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device9 *iface, + const D3D12_STATE_OBJECT_DESC *desc, REFIID iid, void **state_object) + { + FIXME("iface %p, desc %p, iid %s, state_object %p stub!\n", iface, desc, debugstr_guid(iid), state_object); +@@ -4473,14 +4786,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *i + return E_NOTIMPL; + } + +-static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device7 *iface, ++static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device9 *iface, + const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc, + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO *info) + { + FIXME("iface %p, desc %p, info %p stub!\n", iface, desc, info); + } + +-static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device7 *iface, ++static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device9 *iface, + D3D12_SERIALIZED_DATA_TYPE data_type, const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER *identifier) + { + FIXME("iface %p, data_type %u, identifier %p stub!\n", iface, data_type, identifier); +@@ -4488,7 +4801,7 @@ static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_Ch + return D3D12_DRIVER_MATCHING_IDENTIFIER_UNRECOGNIZED; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device9 *iface, + D3D12_BACKGROUND_PROCESSING_MODE mode, D3D12_MEASUREMENTS_ACTION action, HANDLE event, + BOOL *further_measurements_desired) + { +@@ -4498,7 +4811,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12 + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *iface, ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device9 *iface, + const D3D12_STATE_OBJECT_DESC *addition, ID3D12StateObject *state_object_to_grow_from, + REFIID riid, void **new_state_object) + { +@@ -4508,7 +4822,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *if + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device7 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device9 *iface, + const D3D12_PROTECTED_RESOURCE_SESSION_DESC1 *desc, REFIID riid, void **session) + { + FIXME("iface %p, desc %p, riid %s, session %p stub!\n", iface, desc, debugstr_guid(riid), session); +@@ -4516,7 +4830,167 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID + return E_NOTIMPL; + } + +-static const struct ID3D12Device7Vtbl d3d12_device_vtbl = ++static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo2(ID3D12Device9 *iface, ++ D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, ++ const D3D12_RESOURCE_DESC1 *resource_descs, D3D12_RESOURCE_ALLOCATION_INFO1 *info1) ++{ ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); ++ ++ TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", ++ iface, info, visible_mask, count, resource_descs, info1); ++ ++ debug_ignored_node_mask(visible_mask); ++ ++ d3d12_device_get_resource1_allocation_info(device, info1, count, resource_descs, info); ++ ++ return info; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2(ID3D12Device9 *iface, ++ const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *desc, ++ D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, ++ ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) ++{ ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); ++ struct d3d12_resource *object; ++ HRESULT hr; ++ ++ TRACE("iface %p, heap_properties %p, heap_flags %#x, desc %p, initial_state %#x, " ++ "optimized_clear_value %p, protected_session %p, iid %s, resource %p.\n", ++ iface, heap_properties, heap_flags, desc, initial_state, ++ optimized_clear_value, protected_session, debugstr_guid(iid), resource); ++ ++ if (FAILED(hr = d3d12_committed_resource_create(device, heap_properties, heap_flags, ++ desc, initial_state, optimized_clear_value, protected_session, &object))) ++ { ++ *resource = NULL; ++ return hr; ++ } ++ ++ return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1(ID3D12Device9 *iface, ++ ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC1 *resource_desc, ++ D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, ++ REFIID iid, void **resource) ++{ ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); ++ struct d3d12_heap *heap_object; ++ struct d3d12_resource *object; ++ HRESULT hr; ++ ++ TRACE("iface %p, heap %p, heap_offset %#"PRIx64", desc %p, initial_state %#x, " ++ "optimized_clear_value %p, iid %s, resource %p.\n", ++ iface, heap, heap_offset, resource_desc, initial_state, ++ optimized_clear_value, debugstr_guid(iid), resource); ++ ++ heap_object = unsafe_impl_from_ID3D12Heap(heap); ++ ++ if (FAILED(hr = d3d12_placed_resource_create(device, heap_object, heap_offset, ++ resource_desc, initial_state, optimized_clear_value, &object))) ++ return hr; ++ ++ return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); ++} ++ ++static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView(ID3D12Device9 *iface, ++ ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) ++{ ++ FIXME("iface %p, target_resource %p, feedback_resource %p, descriptor %s stub!\n", ++ iface, target_resource, feedback_resource, debug_cpu_handle(descriptor)); ++} ++ ++static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints1(ID3D12Device9 *iface, ++ const D3D12_RESOURCE_DESC1 *desc, UINT first_sub_resource, UINT sub_resource_count, ++ UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, ++ UINT64 *row_sizes, UINT64 *total_bytes) ++{ ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); ++ ++ TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " ++ "layouts %p, row_counts %p, row_sizes %p, total_bytes %p.\n", ++ iface, desc, first_sub_resource, sub_resource_count, base_offset, ++ layouts, row_counts, row_sizes, total_bytes); ++ ++ d3d12_device_get_copyable_footprints(device, desc, first_sub_resource, sub_resource_count, ++ base_offset, layouts, row_counts, row_sizes, total_bytes); ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateShaderCacheSession(ID3D12Device9 *iface, ++ const D3D12_SHADER_CACHE_SESSION_DESC *desc, REFIID iid, void **session) ++{ ++ struct d3d12_device *device = impl_from_ID3D12Device9(iface); ++ struct d3d12_cache_session *object; ++ static const GUID guid_null = {0}; ++ HRESULT hr; ++ ++ static const UINT valid_flags = D3D12_SHADER_CACHE_FLAG_DRIVER_VERSIONED ++ | D3D12_SHADER_CACHE_FLAG_USE_WORKING_DIR; ++ ++ TRACE("iface %p, desc %p, iid %s, session %p.\n", iface, desc, debugstr_guid(iid), session); ++ ++ if (!desc || !memcmp(&desc->Identifier, &guid_null, sizeof(desc->Identifier))) ++ { ++ WARN("No description or identifier, returning E_INVALIDARG.\n"); ++ return E_INVALIDARG; ++ } ++ if (desc->MaximumValueFileSizeBytes > 1024 * 1024 * 1024) ++ { ++ WARN("Requested size is larger than 1GiB, returning E_INVALIDARG.\n"); ++ return E_INVALIDARG; ++ } ++ if (desc->Flags & ~valid_flags) ++ { ++ WARN("Invalid flags %#x, returning E_INVALIDARG.\n", desc->Flags); ++ return E_INVALIDARG; ++ } ++ if (desc->Mode != D3D12_SHADER_CACHE_MODE_MEMORY && desc->Mode != D3D12_SHADER_CACHE_MODE_DISK) ++ { ++ WARN("Invalid mode %#x, returning E_INVALIDARG.\n", desc->Mode); ++ return E_INVALIDARG; ++ } ++ if (!session) ++ { ++ WARN("No output pointer, returning S_FALSE.\n"); ++ return S_FALSE; ++ } ++ *session = NULL; ++ ++ if (!(object = vkd3d_malloc(sizeof(*object)))) ++ return E_OUTOFMEMORY; ++ ++ if (FAILED(hr = d3d12_cache_session_init(object, device, desc))) ++ { ++ vkd3d_free(object); ++ return hr; ++ } ++ ++ hr = ID3D12ShaderCacheSession_QueryInterface(&object->ID3D12ShaderCacheSession_iface, iid, ++ session); ++ ID3D12ShaderCacheSession_Release(&object->ID3D12ShaderCacheSession_iface); ++ return hr; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_ShaderCacheControl(ID3D12Device9 *iface, ++ D3D12_SHADER_CACHE_KIND_FLAGS kinds, D3D12_SHADER_CACHE_CONTROL_FLAGS control) ++{ ++ FIXME("iface %p, kinds %#x control %#x stub!\n", iface, kinds, control); ++ ++ return E_NOTIMPL; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue1(ID3D12Device9 *iface, ++ const D3D12_COMMAND_QUEUE_DESC *desc, REFIID creator_id, REFIID iid, ++ void **command_queue) ++{ ++ FIXME("iface %p, desc %p, creator %s, iid %s, queue %p stub!\n", iface, desc, ++ debugstr_guid(creator_id), debugstr_guid(iid), command_queue); ++ ++ return E_NOTIMPL; ++} ++ ++static const struct ID3D12Device9Vtbl d3d12_device_vtbl = + { + /* IUnknown methods */ + d3d12_device_QueryInterface, +@@ -4596,14 +5070,24 @@ static const struct ID3D12Device7Vtbl d3d12_device_vtbl = + /* ID3D12Device7 methods */ + d3d12_device_AddToStateObject, + d3d12_device_CreateProtectedResourceSession1, ++ /* ID3D12Device8 methods */ ++ d3d12_device_GetResourceAllocationInfo2, ++ d3d12_device_CreateCommittedResource2, ++ d3d12_device_CreatePlacedResource1, ++ d3d12_device_CreateSamplerFeedbackUnorderedAccessView, ++ d3d12_device_GetCopyableFootprints1, ++ /* ID3D12Device9 methods */ ++ d3d12_device_CreateShaderCacheSession, ++ d3d12_device_ShaderCacheControl, ++ d3d12_device_CreateCommandQueue1, + }; + +-struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface) ++struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface) + { + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_device_vtbl); +- return impl_from_ID3D12Device7(iface); ++ return impl_from_ID3D12Device9(iface); + } + + static void *device_worker_main(void *arg) +@@ -4646,13 +5130,15 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + const struct vkd3d_vk_device_procs *vk_procs; + HRESULT hr; + +- device->ID3D12Device7_iface.lpVtbl = &d3d12_device_vtbl; ++ device->ID3D12Device9_iface.lpVtbl = &d3d12_device_vtbl; + device->refcount = 1; + + vkd3d_instance_incref(device->vkd3d_instance = instance); + device->vk_info = instance->vk_info; + device->signal_event = instance->signal_event; + device->wchar_size = instance->wchar_size; ++ device->environment = (instance->vk_api_version >= VK_API_VERSION_1_1) ++ ? VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1 : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; + + device->adapter_luid = create_info->adapter_luid; + device->removed_reason = S_OK; +@@ -4894,28 +5380,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha + + IUnknown *vkd3d_get_device_parent(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); + + return d3d12_device->parent; + } + + VkDevice vkd3d_get_vk_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); + + return d3d12_device->vk_device; + } + + VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); + + return d3d12_device->vk_physical_device; + } + + struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); + + return d3d12_device->vkd3d_instance; + } +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index 89764d0901d..179999148bc 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -1857,6 +1857,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1 + + HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device) + { ++ const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion; + const struct vkd3d_format *format; + + switch (desc->Dimension) +@@ -1892,6 +1893,13 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 + WARN("Invalid sample count 0.\n"); + return E_INVALIDARG; + } ++ if (desc->SampleDesc.Count > 1 ++ && !(desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) ++ { ++ WARN("Sample count %u invalid without ALLOW_RENDER_TARGET or ALLOW_DEPTH_STENCIL.\n", ++ desc->SampleDesc.Count); ++ return E_INVALIDARG; ++ } + + if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) + { +@@ -1926,6 +1934,12 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 + + d3d12_validate_resource_flags(desc->Flags); + ++ if (mip_region->Width && mip_region->Height && mip_region->Depth) ++ { ++ FIXME("Unhandled sampler feedback mip region size (%u, %u, %u).\n", mip_region->Width, mip_region->Height, ++ mip_region->Depth); ++ } ++ + return S_OK; + } + +@@ -1989,6 +2003,11 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + WARN("Invalid initial resource state %#x.\n", initial_state); + return E_INVALIDARG; + } ++ if (initial_state == D3D12_RESOURCE_STATE_RENDER_TARGET && !(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) ++ { ++ WARN("Invalid initial resource state %#x for non-render-target.\n", initial_state); ++ return E_INVALIDARG; ++ } + + if (optimized_clear_value && d3d12_resource_is_buffer(resource)) + { +@@ -2253,7 +2272,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, + HRESULT vkd3d_create_image_resource(ID3D12Device *device, + const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) + { +- struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device7((ID3D12Device7 *)device); ++ struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device9((ID3D12Device9 *)device); + struct d3d12_resource *object; + HRESULT hr; + +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index 08cc110e8f7..6ba29c18004 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -2045,6 +2045,9 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState + + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); + ++ if (state->implicit_root_signature) ++ d3d12_root_signature_Release(state->implicit_root_signature); ++ + vkd3d_free(state); + + d3d12_device_release(device); +@@ -2413,8 +2416,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_descriptor_offset_info offset_info; +- const struct d3d12_root_signature *root_signature; + struct vkd3d_shader_spirv_target_info target_info; ++ struct d3d12_root_signature *root_signature; + VkPipelineLayout vk_pipeline_layout; + HRESULT hr; + +@@ -2425,17 +2428,31 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st + + if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->root_signature))) + { +- WARN("Root signature is NULL.\n"); +- return E_INVALIDARG; ++ TRACE("Root signature is NULL, looking for an embedded signature.\n"); ++ if (FAILED(hr = d3d12_root_signature_create(device, ++ desc->cs.pShaderBytecode, desc->cs.BytecodeLength, &root_signature))) ++ { ++ WARN("Failed to find an embedded root signature, hr %s.\n", debugstr_hresult(hr)); ++ return hr; ++ } ++ state->implicit_root_signature = &root_signature->ID3D12RootSignature_iface; ++ } ++ else ++ { ++ state->implicit_root_signature = NULL; + } + + if (FAILED(hr = d3d12_pipeline_state_find_and_init_uav_counters(state, device, root_signature, + &desc->cs, VK_SHADER_STAGE_COMPUTE_BIT))) ++ { ++ if (state->implicit_root_signature) ++ d3d12_root_signature_Release(state->implicit_root_signature); + return hr; ++ } + + memset(&target_info, 0, sizeof(target_info)); + target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; +- target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; ++ target_info.environment = device->environment; + target_info.extensions = device->vk_info.shader_extensions; + target_info.extension_count = device->vk_info.shader_extension_count; + +@@ -2476,6 +2493,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st + { + WARN("Failed to create Vulkan compute pipeline, hr %s.\n", debugstr_hresult(hr)); + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); ++ if (state->implicit_root_signature) ++ d3d12_root_signature_Release(state->implicit_root_signature); + return hr; + } + +@@ -2483,6 +2502,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st + { + VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL)); + d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); ++ if (state->implicit_root_signature) ++ d3d12_root_signature_Release(state->implicit_root_signature); + return hr; + } + +@@ -3156,7 +3177,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s + ps_target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; + ps_target_info.next = NULL; + ps_target_info.entry_point = "main"; +- ps_target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; ++ ps_target_info.environment = device->environment; + ps_target_info.extensions = vk_info->shader_extensions; + ps_target_info.extension_count = vk_info->shader_extension_count; + ps_target_info.parameters = ps_shader_parameters; +@@ -3186,7 +3207,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s + + memset(&target_info, 0, sizeof(target_info)); + target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; +- target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; ++ target_info.environment = device->environment; + target_info.extensions = vk_info->shader_extensions; + target_info.extension_count = vk_info->shader_extension_count; + +@@ -3484,6 +3505,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s + goto fail; + + state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; ++ state->implicit_root_signature = NULL; + d3d12_device_add_ref(state->device = device); + + return S_OK; +diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c +index ac79ae5ddff..58747342b5c 100644 +--- a/libs/vkd3d/libs/vkd3d/utils.c ++++ b/libs/vkd3d/libs/vkd3d/utils.c +@@ -87,6 +87,8 @@ static const struct vkd3d_format vkd3d_formats[] = + {DXGI_FORMAT_R8_SNORM, VK_FORMAT_R8_SNORM, 1, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_R8_SINT, VK_FORMAT_R8_SINT, 1, 1, 1, 1, COLOR, 1, SINT}, + {DXGI_FORMAT_A8_UNORM, VK_FORMAT_R8_UNORM, 1, 1, 1, 1, COLOR, 1}, ++ {DXGI_FORMAT_B5G6R5_UNORM, VK_FORMAT_R5G6B5_UNORM_PACK16, 2, 1, 1, 1, COLOR, 1}, ++ {DXGI_FORMAT_B5G5R5A1_UNORM, VK_FORMAT_A1R5G5B5_UNORM_PACK16, 2, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B8G8R8X8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, + {DXGI_FORMAT_B8G8R8A8_TYPELESS, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1, TYPELESS}, +@@ -116,6 +118,9 @@ static const struct vkd3d_format vkd3d_formats[] = + {DXGI_FORMAT_BC7_UNORM_SRGB, VK_FORMAT_BC7_SRGB_BLOCK, 1, 4, 4, 16, COLOR, 1}, + }; + ++static const struct vkd3d_format format_b4g4r4a4 = ++ {DXGI_FORMAT_B4G4R4A4_UNORM, VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, 2, 1, 1, 1, COLOR, 1}; ++ + /* Each depth/stencil format is only compatible with itself in Vulkan. */ + static const struct vkd3d_format vkd3d_depth_stencil_formats[] = + { +@@ -449,6 +454,11 @@ const struct vkd3d_format *vkd3d_get_format(const struct d3d12_device *device, + return &vkd3d_formats[i]; + } + ++ /* Do not check VkPhysicalDevice4444FormatsFeaturesEXT because apps ++ * should query format support, which returns more detailed info. */ ++ if (dxgi_format == format_b4g4r4a4.dxgi_format && device->vk_info.EXT_4444_formats) ++ return &format_b4g4r4a4; ++ + return NULL; + } + +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +index 7919b7d8760..29305fbdc63 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +@@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, + + if (!device) + { +- ID3D12Device_Release(&object->ID3D12Device7_iface); ++ ID3D12Device_Release(&object->ID3D12Device9_iface); + return S_FALSE; + } + +- return return_interface(&object->ID3D12Device7_iface, &IID_ID3D12Device, iid, device); ++ return return_interface(&object->ID3D12Device9_iface, &IID_ID3D12Device, iid, device); + } + + /* ID3D12RootSignatureDeserializer */ +@@ -453,11 +453,10 @@ HRESULT vkd3d_serialize_root_signature(const D3D12_ROOT_SIGNATURE_DESC *desc, + if ((ret = vkd3d_shader_serialize_root_signature(&vkd3d_desc, &dxbc, &messages)) < 0) + { + WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); +- if (error_blob && messages) +- { +- if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) +- ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); +- } ++ if (!error_blob) ++ vkd3d_shader_free_messages(messages); ++ else if (messages && FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) ++ ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); + return hresult_from_vkd3d_result(ret); + } + vkd3d_shader_free_messages(messages); +@@ -494,11 +493,10 @@ HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED_ROOT_SIGN + if ((ret = vkd3d_shader_serialize_root_signature(vkd3d_desc, &dxbc, &messages)) < 0) + { + WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); +- if (error_blob && messages) +- { +- if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) +- ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); +- } ++ if (!error_blob) ++ vkd3d_shader_free_messages(messages); ++ else if (messages && FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) ++ ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); + return hresult_from_vkd3d_result(ret); + } + vkd3d_shader_free_messages(messages); +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index b092bb26ded..e0a7acb306d 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -55,7 +55,7 @@ + + #define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u + #define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u +-#define VKD3D_MAX_SHADER_EXTENSIONS 4u ++#define VKD3D_MAX_SHADER_EXTENSIONS 5u + #define VKD3D_MAX_SHADER_STAGES 5u + #define VKD3D_MAX_VK_SYNC_OBJECTS 4u + #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u +@@ -128,11 +128,13 @@ struct vkd3d_vulkan_info + bool KHR_sampler_mirror_clamp_to_edge; + bool KHR_timeline_semaphore; + /* EXT device extensions */ ++ bool EXT_4444_formats; + bool EXT_calibrated_timestamps; + bool EXT_conditional_rendering; + bool EXT_debug_marker; + bool EXT_depth_clip_enable; + bool EXT_descriptor_indexing; ++ bool EXT_fragment_shader_interlock; + bool EXT_mutable_descriptor_type; + bool EXT_robustness2; + bool EXT_shader_demote_to_helper_invocation; +@@ -184,6 +186,7 @@ struct vkd3d_instance + struct vkd3d_vulkan_info vk_info; + struct vkd3d_vk_global_procs vk_global_procs; + void *libvulkan; ++ uint32_t vk_api_version; + + uint64_t config_flags; + enum vkd3d_api_version api_version; +@@ -202,36 +205,11 @@ union vkd3d_thread_handle + void *handle; + }; + +-struct vkd3d_mutex +-{ +- CRITICAL_SECTION lock; +-}; +- + struct vkd3d_cond + { + CONDITION_VARIABLE cond; + }; + +-static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) +-{ +- InitializeCriticalSection(&lock->lock); +-} +- +-static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) +-{ +- EnterCriticalSection(&lock->lock); +-} +- +-static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) +-{ +- LeaveCriticalSection(&lock->lock); +-} +- +-static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) +-{ +- DeleteCriticalSection(&lock->lock); +-} +- + static inline void vkd3d_cond_init(struct vkd3d_cond *cond) + { + InitializeConditionVariable(&cond->cond); +@@ -287,53 +265,11 @@ union vkd3d_thread_handle + void *handle; + }; + +-struct vkd3d_mutex +-{ +- pthread_mutex_t lock; +-}; +- + struct vkd3d_cond + { + pthread_cond_t cond; + }; + +- +-static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) +-{ +- int ret; +- +- ret = pthread_mutex_init(&lock->lock, NULL); +- if (ret) +- ERR("Could not initialize the mutex, error %d.\n", ret); +-} +- +-static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) +-{ +- int ret; +- +- ret = pthread_mutex_lock(&lock->lock); +- if (ret) +- ERR("Could not lock the mutex, error %d.\n", ret); +-} +- +-static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) +-{ +- int ret; +- +- ret = pthread_mutex_unlock(&lock->lock); +- if (ret) +- ERR("Could not unlock the mutex, error %d.\n", ret); +-} +- +-static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) +-{ +- int ret; +- +- ret = pthread_mutex_destroy(&lock->lock); +- if (ret) +- ERR("Could not destroy the mutex, error %d.\n", ret); +-} +- + static inline void vkd3d_cond_init(struct vkd3d_cond *cond) + { + int ret; +@@ -1279,6 +1215,7 @@ struct d3d12_pipeline_state + + struct d3d12_pipeline_uav_counter_state uav_counters; + ++ ID3D12RootSignature *implicit_root_signature; + struct d3d12_device *device; + + struct vkd3d_private_store private_store; +@@ -1735,7 +1672,7 @@ struct vkd3d_desc_object_cache + /* ID3D12Device */ + struct d3d12_device + { +- ID3D12Device7 ID3D12Device7_iface; ++ ID3D12Device9 ID3D12Device9_iface; + unsigned int refcount; + + VkDevice vk_device; +@@ -1743,6 +1680,7 @@ struct d3d12_device + struct vkd3d_vk_device_procs vk_procs; + PFN_vkd3d_signal_event signal_event; + size_t wchar_size; ++ enum vkd3d_shader_spirv_environment environment; + + struct vkd3d_gpu_va_allocator gpu_va_allocator; + +@@ -1810,29 +1748,29 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 + bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); + void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, + const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); +-struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface); ++struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface); + HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); + void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); + + static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) + { +- return ID3D12Device7_QueryInterface(&device->ID3D12Device7_iface, iid, object); ++ return ID3D12Device9_QueryInterface(&device->ID3D12Device9_iface, iid, object); + } + + static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) + { +- return ID3D12Device7_AddRef(&device->ID3D12Device7_iface); ++ return ID3D12Device9_AddRef(&device->ID3D12Device9_iface); + } + + static inline ULONG d3d12_device_release(struct d3d12_device *device) + { +- return ID3D12Device7_Release(&device->ID3D12Device7_iface); ++ return ID3D12Device9_Release(&device->ID3D12Device9_iface); + } + + static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) + { +- return ID3D12Device7_GetDescriptorHandleIncrementSize(&device->ID3D12Device7_iface, descriptor_type); ++ return ID3D12Device9_GetDescriptorHandleIncrementSize(&device->ID3D12Device9_iface, descriptor_type); + } + + /* utils */ +@@ -1993,4 +1931,10 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) + vkd3d_header->next = vkd3d_structure; + } + ++struct vkd3d_shader_cache; ++ ++int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache); ++unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache); ++unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache); ++ + #endif /* __VKD3D_PRIVATE_H */ +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch new file mode 100644 index 00000000..01d18349 --- /dev/null +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch @@ -0,0 +1,1024 @@ +From 299d2898cbd133a56a365e62b5bb1688b0dc85a5 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Tue, 23 Apr 2024 08:01:19 +1000 +Subject: [PATCH] Updated vkd3d to 7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6eaeb4. + +--- + libs/vkd3d/include/private/vkd3d_common.h | 70 ++++++ + libs/vkd3d/include/vkd3d.h | 115 +++++++++- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 205 ++++++++++++++++-- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 2 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 72 ++++-- + libs/vkd3d/libs/vkd3d-shader/ir.c | 14 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 5 + + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 84 ------- + 9 files changed, 430 insertions(+), 138 deletions(-) + +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index f9df47d339c..1da73bcfb2e 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -492,6 +492,76 @@ static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) + #endif + } + ++struct vkd3d_cond ++{ ++#ifdef _WIN32 ++ CONDITION_VARIABLE cond; ++#else ++ pthread_cond_t cond; ++#endif ++}; ++ ++static inline void vkd3d_cond_init(struct vkd3d_cond *cond) ++{ ++#ifdef _WIN32 ++ InitializeConditionVariable(&cond->cond); ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_init(&cond->cond, NULL))) ++ ERR("Failed to initialise the condition variable, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) ++{ ++#ifdef _WIN32 ++ WakeConditionVariable(&cond->cond); ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_signal(&cond->cond))) ++ ERR("Failed to signal the condition variable, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) ++{ ++#ifdef _WIN32 ++ WakeAllConditionVariable(&cond->cond); ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_broadcast(&cond->cond))) ++ ERR("Failed to broadcast the condition variable, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) ++{ ++#ifdef _WIN32 ++ if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) ++ ERR("Failed to wait on the condition variable, error %lu.\n", GetLastError()); ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_wait(&cond->cond, &lock->lock))) ++ ERR("Failed to wait on the condition variable, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) ++{ ++#ifdef _WIN32 ++ /* Nothing to do. */ ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_destroy(&cond->cond))) ++ ERR("Failed to destroy the condition variable, ret %d.\n", ret); ++#endif ++} ++ + static inline void vkd3d_parse_version(const char *version, int *major, int *minor) + { + *major = atoi(version); +diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h +index aa68b70e1bf..71c56331d86 100644 +--- a/libs/vkd3d/include/vkd3d.h ++++ b/libs/vkd3d/include/vkd3d.h +@@ -46,21 +46,37 @@ extern "C" { + * \since 1.0 + */ + ++/** The type of a chained structure. */ + enum vkd3d_structure_type + { +- /* 1.0 */ ++ /** The structure is a vkd3d_instance_create_info structure. */ + VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, ++ /** The structure is a vkd3d_device_create_info structure. */ + VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO, ++ /** The structure is a vkd3d_image_resource_create_info structure. */ + VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO, + +- /* 1.1 */ ++ /** ++ * The structure is a vkd3d_optional_instance_extensions_info structure. ++ * \since 1.1 ++ */ + VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO, + +- /* 1.2 */ ++ /** ++ * The structure is a vkd3d_optional_device_extensions_info structure. ++ * \since 1.2 ++ */ + VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO, ++ /** ++ * The structure is a vkd3d_application_info structure. ++ * \since 1.2 ++ */ + VKD3D_STRUCTURE_TYPE_APPLICATION_INFO, + +- /* 1.3 */ ++ /** ++ * The structure is a vkd3d_host_time_domain_info structure. ++ * \since 1.3 ++ */ + VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_STRUCTURE_TYPE), +@@ -93,54 +109,131 @@ typedef HRESULT (*PFN_vkd3d_join_thread)(void *thread); + + struct vkd3d_instance; + ++/** ++ * A chained structure containing instance creation parameters. ++ */ + struct vkd3d_instance_create_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** An pointer to a function to signal events. */ + PFN_vkd3d_signal_event pfn_signal_event; ++ /** ++ * An optional pointer to a function to create threads. If this is NULL vkd3d will use a ++ * function of its choice, depending on the platform. It must be NULL if and only if ++ * pfn_join_thread is NULL. ++ */ + PFN_vkd3d_create_thread pfn_create_thread; ++ /** ++ * An optional pointer to a function to join threads. If this is NULL vkd3d will use a ++ * function of its choice, depending on the platform. It must be NULL if and only if ++ * pfn_create_thread is NULL. ++ */ + PFN_vkd3d_join_thread pfn_join_thread; ++ /** The size of type WCHAR. It must be 2 or 4 and should normally be set to sizeof(WCHAR). */ + size_t wchar_size; + +- /* If set to NULL, libvkd3d loads libvulkan. */ ++ /** ++ * A pointer to the vkGetInstanceProcAddr Vulkan function, which will be used to load all the ++ * other Vulkan functions. If set to NULL, vkd3d will search and use the Vulkan loader. ++ */ + PFN_vkGetInstanceProcAddr pfn_vkGetInstanceProcAddr; + ++ /** ++ * A list of Vulkan instance extensions to request. They are intended as required, so instance ++ * creation will fail if any of them is not available. ++ */ + const char * const *instance_extensions; ++ /** The number of elements in the instance_extensions array. */ + uint32_t instance_extension_count; + }; + +-/* Extends vkd3d_instance_create_info. Available since 1.1. */ ++/** ++ * A chained structure to specify optional instance extensions. ++ * ++ * This structure extends vkd3d_instance_create_info. ++ * ++ * \since 1.1 ++ */ + struct vkd3d_optional_instance_extensions_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** ++ * A list of optional Vulkan instance extensions to request. Instance creation does not fail if ++ * they are not available. ++ */ + const char * const *extensions; ++ /** The number of elements in the extensions array. */ + uint32_t extension_count; + }; + +-/* Extends vkd3d_instance_create_info. Available since 1.2. */ ++/** ++ * A chained structure to specify application information. ++ * ++ * This structure extends vkd3d_instance_create_info. ++ * ++ * \since 1.2 ++ */ + struct vkd3d_application_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_APPLICATION_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** ++ * The application's name, to be passed to the Vulkan implementation. If it is NULL, a name is ++ * computed from the process executable filename. If that cannot be done, the empty string is ++ * used. ++ */ + const char *application_name; ++ /** The application's version, to be passed to the Vulkan implementation. */ + uint32_t application_version; + +- const char *engine_name; /* "vkd3d" if NULL */ +- uint32_t engine_version; /* vkd3d version if engine_name is NULL */ +- ++ /** ++ * The engine name, to be passed to the Vulkan implementation. If it is NULL, "vkd3d" is used. ++ */ ++ const char *engine_name; ++ /** ++ * The engine version, to be passed to the Vulkan implementation. If it is 0, the version is ++ * computed from the vkd3d library version. ++ */ ++ uint32_t engine_version; ++ ++ /** ++ * The vkd3d API version to use, to guarantee backward compatibility of the shared library. If ++ * this chained structure is not used then VKD3D_API_VERSION_1_0 is used. ++ */ + enum vkd3d_api_version api_version; + }; + +-/* Extends vkd3d_instance_create_info. Available since 1.3. */ ++/** ++ * A chained structure to specify the host time domain. ++ * ++ * This structure extends vkd3d_instance_create_info. ++ * ++ * \since 1.3 ++ */ + struct vkd3d_host_time_domain_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** ++ * The number of clock ticks per second, used for GetClockCalibration(). It should normally ++ * match the expected result of QueryPerformanceFrequency(). If this chained structure is not ++ * used then 10 millions is used, which means that each tick is a tenth of microsecond, or ++ * equivalently 100 nanoseconds. ++ */ + uint64_t ticks_per_second; + }; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 220ba773887..d07d5adee70 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -419,6 +419,11 @@ enum dx_intrinsic_opcode + DX_FLATTENED_THREAD_ID_IN_GROUP = 96, + DX_MAKE_DOUBLE = 101, + DX_SPLIT_DOUBLE = 102, ++ DX_LOAD_OUTPUT_CONTROL_POINT = 103, ++ DX_LOAD_PATCH_CONSTANT = 104, ++ DX_DOMAIN_LOCATION = 105, ++ DX_STORE_PATCH_CONSTANT = 106, ++ DX_OUTPUT_CONTROL_POINT_ID = 107, + DX_PRIMITIVE_ID = 108, + DX_LEGACY_F32TOF16 = 130, + DX_LEGACY_F16TOF32 = 131, +@@ -799,6 +804,7 @@ struct sm6_parser + + struct vkd3d_shader_dst_param *output_params; + struct vkd3d_shader_dst_param *input_params; ++ struct vkd3d_shader_dst_param *patch_constant_params; + uint32_t io_regs_declared[(VKD3DSPR_COUNT + 0x1f) / 0x20]; + + struct sm6_function *functions; +@@ -2433,10 +2439,12 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, + if (sm6_value_is_constant(address)) + { + idx->offset = sm6_value_get_constant_uint(address); ++ idx->rel_addr = NULL; + } + else if (sm6_value_is_undef(address)) + { + idx->offset = 0; ++ idx->rel_addr = NULL; + } + else + { +@@ -2515,7 +2523,7 @@ static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, + * overestimate the value count somewhat, but this should be no problem. */ + value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); + sm6->value_capacity = max(sm6->value_capacity, value_count); +- sm6->functions[sm6->function_count].value_count = value_count; ++ sm6->functions[sm6->function_count++].value_count = value_count; + /* The value count returns to its previous value after handling a function. */ + if (value_count < SIZE_MAX) + value_count = old_value_count; +@@ -3689,12 +3697,35 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( + } + + static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, +- enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) ++ bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) + { ++ enum vkd3d_shader_type shader_type = sm6->p.program.shader_version.type; ++ bool is_patch_constant, is_control_point; + struct vkd3d_shader_dst_param *param; + const struct signature_element *e; + unsigned int i, count; + ++ is_patch_constant = reg_type == VKD3DSPR_PATCHCONST; ++ ++ is_control_point = false; ++ if (!is_patch_constant) ++ { ++ switch (shader_type) ++ { ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ case VKD3D_SHADER_TYPE_GEOMETRY: ++ is_control_point = is_input; ++ break; ++ ++ case VKD3D_SHADER_TYPE_HULL: ++ is_control_point = true; ++ break; ++ ++ default: ++ break; ++ } ++ } ++ + for (i = 0; i < s->element_count; ++i) + { + e = &s->elements[i]; +@@ -3709,8 +3740,18 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade + + dst_param_io_init(param, e, reg_type); + count = 0; +- if (e->register_count > 1) ++ ++ if (is_control_point) ++ { ++ if (reg_type == VKD3DSPR_OUTPUT) ++ param->reg.idx[count].rel_addr = instruction_array_create_outpointid_param(&sm6->p.program.instructions); ++ param->reg.idx[count++].offset = 0; ++ } ++ ++ if (e->register_count > 1 || (is_patch_constant && vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) + param->reg.idx[count++].offset = 0; ++ ++ assert(count < ARRAY_SIZE(param->reg.idx)); + param->reg.idx[count++].offset = i; + param->reg.idx_count = count; + } +@@ -3718,12 +3759,21 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade + + static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) + { +- sm6_parser_init_signature(sm6, output_signature, VKD3DSPR_OUTPUT, sm6->output_params); ++ sm6_parser_init_signature(sm6, output_signature, false, VKD3DSPR_OUTPUT, sm6->output_params); + } + + static void sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) + { +- sm6_parser_init_signature(sm6, input_signature, VKD3DSPR_INPUT, sm6->input_params); ++ sm6_parser_init_signature(sm6, input_signature, true, VKD3DSPR_INPUT, sm6->input_params); ++} ++ ++static void sm6_parser_init_patch_constant_signature(struct sm6_parser *sm6, ++ const struct shader_signature *patch_constant_signature) ++{ ++ bool is_input = sm6->p.program.shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; ++ ++ sm6_parser_init_signature(sm6, patch_constant_signature, is_input, VKD3DSPR_PATCHCONST, ++ sm6->patch_constant_params); + } + + static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) +@@ -4752,6 +4802,33 @@ static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic + src_param_init_from_value(src_param, operands[0]); + } + ++static void sm6_parser_emit_dx_domain_location(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ unsigned int component_idx; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ ++ if ((component_idx = sm6_value_get_constant_uint(operands[0])) >= 3) ++ { ++ WARN("Invalid component index %u.\n", component_idx); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid domain location component index %u.", component_idx); ++ component_idx = 0; ++ } ++ ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 3); ++ vsir_register_init(&src_param->reg, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 0); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ src_param_init_scalar(src_param, component_idx); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -4989,18 +5066,43 @@ static void sm6_parser_emit_dx_tertiary(struct sm6_parser *sm6, enum dx_intrinsi + static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { ++ bool is_control_point = op == DX_LOAD_OUTPUT_CONTROL_POINT; ++ bool is_patch_constant = op == DX_LOAD_PATCH_CONSTANT; + struct vkd3d_shader_instruction *ins = state->ins; ++ unsigned int count, row_index, column_index; ++ const struct vkd3d_shader_dst_param *params; + struct vkd3d_shader_src_param *src_param; + const struct shader_signature *signature; +- unsigned int row_index, column_index; + const struct signature_element *e; + + row_index = sm6_value_get_constant_uint(operands[0]); + column_index = sm6_value_get_constant_uint(operands[2]); + ++ if (is_control_point && operands[3]->is_undefined) ++ { ++ /* dxcompiler will compile source which does this, so let it pass. */ ++ WARN("Control point id is undefined.\n"); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND, ++ "The index for a control point load is undefined."); ++ } ++ + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + +- signature = &sm6->p.program.input_signature; ++ if (is_patch_constant) ++ { ++ signature = &sm6->p.program.patch_constant_signature; ++ params = sm6->patch_constant_params; ++ } ++ else if (is_control_point) ++ { ++ signature = &sm6->p.program.output_signature; ++ params = sm6->output_params; ++ } ++ else ++ { ++ signature = &sm6->p.program.input_signature; ++ params = sm6->input_params; ++ } + if (row_index >= signature->element_count) + { + WARN("Invalid row index %u.\n", row_index); +@@ -5012,10 +5114,18 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin + + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; +- src_param->reg = sm6->input_params[row_index].reg; ++ src_param->reg = params[row_index].reg; + src_param_init_scalar(src_param, column_index); ++ count = 0; ++ + if (e->register_count > 1) +- register_index_address_init(&src_param->reg.idx[0], operands[1], sm6); ++ register_index_address_init(&src_param->reg.idx[count++], operands[1], sm6); ++ ++ if (!is_patch_constant && !operands[3]->is_undefined) ++ { ++ assert(src_param->reg.idx_count > count); ++ register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); ++ } + + instruction_dst_param_init_ssa_scalar(ins, sm6); + } +@@ -5040,6 +5150,12 @@ static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intri + instruction_dst_param_init_ssa_scalar(ins, sm6); + } + ++static void sm6_parser_emit_dx_output_control_point_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT); ++} ++ + static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -5473,6 +5589,7 @@ static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intr + static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { ++ bool is_patch_constant = op == DX_STORE_PATCH_CONSTANT; + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_dst_param *dst_param; +@@ -5484,7 +5601,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr + row_index = sm6_value_get_constant_uint(operands[0]); + column_index = sm6_value_get_constant_uint(operands[2]); + +- signature = &sm6->p.program.output_signature; ++ signature = is_patch_constant ? &sm6->p.program.patch_constant_signature : &sm6->p.program.output_signature; + if (row_index >= signature->element_count) + { + WARN("Invalid row index %u.\n", row_index); +@@ -5516,7 +5633,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr + if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) + return; + dst_param_init_scalar(dst_param, column_index); +- dst_param->reg = sm6->output_params[row_index].reg; ++ dst_param->reg = is_patch_constant ? sm6->patch_constant_params[row_index].reg : sm6->output_params[row_index].reg; + if (e->register_count > 1) + register_index_address_init(&dst_param->reg.idx[0], operands[1], sm6); + +@@ -5736,6 +5853,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, + [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, + [DX_DISCARD ] = {"v", "1", sm6_parser_emit_dx_discard}, ++ [DX_DOMAIN_LOCATION ] = {"f", "c", sm6_parser_emit_dx_domain_location}, + [DX_DOT2 ] = {"g", "RRRR", sm6_parser_emit_dx_dot}, + [DX_DOT3 ] = {"g", "RRRRRR", sm6_parser_emit_dx_dot}, + [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, +@@ -5765,8 +5883,11 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_LEGACY_F16TOF32 ] = {"f", "i", sm6_parser_emit_dx_unary}, + [DX_LEGACY_F32TOF16 ] = {"i", "f", sm6_parser_emit_dx_unary}, + [DX_LOAD_INPUT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, ++ [DX_LOAD_OUTPUT_CONTROL_POINT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, ++ [DX_LOAD_PATCH_CONSTANT ] = {"o", "ii8", sm6_parser_emit_dx_load_input}, + [DX_LOG ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, ++ [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, + [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, + [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, + [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, +@@ -5788,6 +5909,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_SPLIT_DOUBLE ] = {"S", "d", sm6_parser_emit_dx_split_double}, + [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, ++ [DX_STORE_PATCH_CONSTANT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, + [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_TEX2DMS_GET_SAMPLE_POS ] = {"o", "Hi", sm6_parser_emit_dx_get_sample_pos}, + [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, +@@ -7286,11 +7408,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + struct sm6_block *code_block; + struct sm6_value *dst; + +- if (sm6->function_count) +- { +- FIXME("Multiple functions are not supported yet.\n"); +- return VKD3D_ERROR_INVALID_SHADER; +- } + if (!(function->declaration = sm6_parser_next_function_definition(sm6))) + { + WARN("Failed to find definition to match function body.\n"); +@@ -8976,10 +9093,15 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons + { + return ret; + } +- /* TODO: patch constant signature in operand 2. */ ++ if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], ++ &sm6->p.program.patch_constant_signature, tessellator_domain)) < 0) ++ { ++ return ret; ++ } + + sm6_parser_init_input_signature(sm6, &sm6->p.program.input_signature); + sm6_parser_init_output_signature(sm6, &sm6->p.program.output_signature); ++ sm6_parser_init_patch_constant_signature(sm6, &sm6->p.program.patch_constant_signature); + + return VKD3D_OK; + } +@@ -9509,9 +9631,10 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 + static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *source_name, + struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) + { ++ size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; ++ const struct shader_signature *patch_constant_signature = &sm6->p.program.patch_constant_signature; + const struct shader_signature *output_signature = &sm6->p.program.output_signature; + const struct shader_signature *input_signature = &sm6->p.program.input_signature; +- size_t count, length, function_count, byte_code_size = dxbc_desc->byte_code_size; + const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t version_token, dxil_version, token_count, magic; + const uint32_t *byte_code = dxbc_desc->byte_code; +@@ -9674,7 +9797,9 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *sou + } + + if (!(sm6->output_params = vsir_program_get_dst_params(&sm6->p.program, output_signature->element_count)) +- || !(sm6->input_params = vsir_program_get_dst_params(&sm6->p.program, input_signature->element_count))) ++ || !(sm6->input_params = vsir_program_get_dst_params(&sm6->p.program, input_signature->element_count)) ++ || !(sm6->patch_constant_params = vsir_program_get_dst_params(&sm6->p.program, ++ patch_constant_signature->element_count))) + { + ERR("Failed to allocate input/output parameters.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, +@@ -9705,6 +9830,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *sou + "Out of memory allocating DXIL value array."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } ++ sm6->function_count = 0; + sm6->ssa_next_id = 1; + + if ((ret = sm6_parser_globals_init(sm6)) < 0) +@@ -9754,7 +9880,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *sou + return ret; + } + +- if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count)) ++ if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count ++ + patch_constant_signature->element_count)) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory emitting shader signature declarations."); +@@ -9771,9 +9898,41 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *sou + return VKD3D_ERROR_INVALID_SHADER; + } + +- assert(sm6->function_count == 1); +- if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) +- return ret; ++ if (version.type == VKD3D_SHADER_TYPE_HULL) ++ { ++ sm6_parser_add_instruction(sm6, VKD3DSIH_HS_CONTROL_POINT_PHASE); ++ ++ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) ++ return ret; ++ ++ if (!(fn = sm6_parser_get_function(sm6, sm6->patch_constant_function))) ++ { ++ WARN("Failed to find patch constant function '%s'.\n", sm6->patch_constant_function); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "Failed to find the patch constant function '%s' for a hull shader.", ++ sm6->patch_constant_function); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ sm6_parser_add_instruction(sm6, VKD3DSIH_HS_FORK_PHASE); ++ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) ++ return ret; ++ ++ expected_function_count = 2; ++ } ++ else ++ { ++ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) ++ return ret; ++ expected_function_count = 1; ++ } ++ ++ if (sm6->function_count > expected_function_count) ++ { ++ FIXME("%zu unhandled functions.\n", sm6->function_count - expected_function_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "%zu functions were not emitted.", sm6->function_count - expected_function_count); ++ } + + dxil_block_destroy(&sm6->root_block); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 3e8dd2c486b..3e482a5fc70 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -79,6 +79,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_OUTPUT: + case VKD3DSIH_DCL_OUTPUT_SIV: ++ case VKD3DSIH_NOP: + break; + case VKD3DSIH_RET: + shader_glsl_ret(generator, instruction); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 7a8fe4de437..5e3010c4353 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -392,7 +392,7 @@ struct hlsl_attribute + struct hlsl_reg_reservation + { + char reg_type; +- unsigned int reg_index; ++ unsigned int reg_space, reg_index; + + char offset_type; + unsigned int offset_index; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 0c196b77595..f99f322d8d1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -1197,17 +1197,18 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl + return true; + } + +-static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) ++static bool parse_reservation_index(const char *string, char *type, uint32_t *index) + { +- struct hlsl_reg_reservation reservation = {0}; ++ if (!sscanf(string + 1, "%u", index)) ++ return false; + +- if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) +- { +- FIXME("Unsupported register reservation syntax.\n"); +- return reservation; +- } +- reservation.reg_type = ascii_tolower(reg_string[0]); +- return reservation; ++ *type = ascii_tolower(string[0]); ++ return true; ++} ++ ++static bool parse_reservation_space(const char *string, uint32_t *space) ++{ ++ return !ascii_strncasecmp(string, "space", 5) && sscanf(string + 5, "%u", space); + } + + static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, +@@ -5675,8 +5676,8 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %type param_list + %type parameters + +-%type register_opt +-%type packoffset_opt ++%type register_reservation ++%type packoffset_reservation + + %type texture_type texture_ms_type uav_type rov_type + +@@ -6300,12 +6301,12 @@ colon_attribute: + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; + } +- | register_opt ++ | register_reservation + { + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation = $1; + } +- | packoffset_opt ++ | packoffset_reservation + { + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation = $1; +@@ -6327,22 +6328,57 @@ semantic: + } + + /* FIXME: Writemasks */ +-register_opt: ++register_reservation: + ':' KW_REGISTER '(' any_identifier ')' + { +- $$ = parse_reg_reservation($4); ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) ++ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $4); ++ + vkd3d_free($4); + } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' + { +- FIXME("Ignoring shader target %s in a register reservation.\n", debugstr_a($4)); ++ memset(&$$, 0, sizeof($$)); ++ if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) ++ { ++ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); ++ } ++ else if (parse_reservation_space($6, &$$.reg_space)) ++ { ++ if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) ++ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $4); ++ } ++ else ++ { ++ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register or space reservation '%s'.", $6); ++ } ++ + vkd3d_free($4); ++ vkd3d_free($6); ++ } ++ | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' ++ { ++ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + +- $$ = parse_reg_reservation($6); ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) ++ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $6); ++ ++ if (!parse_reservation_space($8, &$$.reg_space)) ++ hlsl_error(ctx, &@8, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register space reservation '%s'.", $8); ++ ++ vkd3d_free($4); + vkd3d_free($6); ++ vkd3d_free($8); + } + +-packoffset_opt: ++packoffset_reservation: + ':' KW_PACKOFFSET '(' any_identifier ')' + { + $$ = parse_packoffset(ctx, $4, NULL, &@$); +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index eca18f4eb28..1f8d60c62ac 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -636,11 +636,14 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont + return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; + } + +-static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( ++struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( + struct vkd3d_shader_instruction_array *instructions) + { + struct vkd3d_shader_src_param *rel_addr; + ++ if (instructions->outpointid_param) ++ return instructions->outpointid_param; ++ + if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) + return NULL; + +@@ -648,6 +651,7 @@ static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( + rel_addr->swizzle = 0; + rel_addr->modifiers = 0; + ++ instructions->outpointid_param = rel_addr; + return rel_addr; + } + +@@ -3344,6 +3348,14 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program + enum vkd3d_result ret; + size_t i; + ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) ++ { ++ FIXME("Hull shaders are not supported.\n"); ++ vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "The structurizer does not support hull shaders."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ + memset(cfg, 0, sizeof(*cfg)); + cfg->message_context = message_context; + cfg->program = program; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 1f4320968d3..4434e6e98f2 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -202,6 +202,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_WARNING_DXIL_INVALID_MASK = 8307, + VKD3D_SHADER_WARNING_DXIL_INVALID_OPERATION = 8308, + VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT = 8309, ++ VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND = 8310, + + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, + VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER = 9001, +@@ -1300,6 +1301,8 @@ struct vkd3d_shader_instruction_array + struct vkd3d_shader_immediate_constant_buffer **icbs; + size_t icb_capacity; + size_t icb_count; ++ ++ struct vkd3d_shader_src_param *outpointid_param; + }; + + bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); +@@ -1310,6 +1313,8 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins + struct vkd3d_shader_immediate_constant_buffer *icb); + bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, + unsigned int dst, unsigned int src); ++struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( ++ struct vkd3d_shader_instruction_array *instructions); + void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); + + enum vkd3d_shader_config_flags +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index e0a7acb306d..5f60c8d90ad 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -24,10 +24,6 @@ + #define VK_NO_PROTOTYPES + #define CONST_VTABLE + +-#ifdef _WIN32 +-# define _WIN32_WINNT 0x0600 /* for condition variables */ +-#endif +- + #include "vkd3d_common.h" + #include "vkd3d_blob.h" + #include "vkd3d_memory.h" +@@ -205,36 +201,6 @@ union vkd3d_thread_handle + void *handle; + }; + +-struct vkd3d_cond +-{ +- CONDITION_VARIABLE cond; +-}; +- +-static inline void vkd3d_cond_init(struct vkd3d_cond *cond) +-{ +- InitializeConditionVariable(&cond->cond); +-} +- +-static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) +-{ +- WakeConditionVariable(&cond->cond); +-} +- +-static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) +-{ +- WakeAllConditionVariable(&cond->cond); +-} +- +-static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) +-{ +- if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) +- ERR("Could not sleep on the condition variable, error %lu.\n", GetLastError()); +-} +- +-static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) +-{ +-} +- + static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) + { + return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; +@@ -265,56 +231,6 @@ union vkd3d_thread_handle + void *handle; + }; + +-struct vkd3d_cond +-{ +- pthread_cond_t cond; +-}; +- +-static inline void vkd3d_cond_init(struct vkd3d_cond *cond) +-{ +- int ret; +- +- ret = pthread_cond_init(&cond->cond, NULL); +- if (ret) +- ERR("Could not initialize the condition variable, error %d.\n", ret); +-} +- +-static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) +-{ +- int ret; +- +- ret = pthread_cond_signal(&cond->cond); +- if (ret) +- ERR("Could not signal the condition variable, error %d.\n", ret); +-} +- +-static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) +-{ +- int ret; +- +- ret = pthread_cond_broadcast(&cond->cond); +- if (ret) +- ERR("Could not broadcast the condition variable, error %d.\n", ret); +-} +- +-static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) +-{ +- int ret; +- +- ret = pthread_cond_wait(&cond->cond, &lock->lock); +- if (ret) +- ERR("Could not wait on the condition variable, error %d.\n", ret); +-} +- +-static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) +-{ +- int ret; +- +- ret = pthread_cond_destroy(&cond->cond); +- if (ret) +- ERR("Could not destroy the condition variable, error %d.\n", ret); +-} +- + # if HAVE_SYNC_BOOL_COMPARE_AND_SWAP + static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) + { +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-b1eaf8327bf59b516f80e232e86332473ed.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-b1eaf8327bf59b516f80e232e86332473ed.patch deleted file mode 100644 index c831b9fc..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-b1eaf8327bf59b516f80e232e86332473ed.patch +++ /dev/null @@ -1,1242 +0,0 @@ -From daa351361adc81f3fb7db6a71bbdaee7bb9d7cde Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 28 Mar 2024 10:39:27 +1100 -Subject: [PATCH] Updated vkd3d to b1eaf8327bf59b516f80e232e86332473ed97edc. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 2 + - libs/vkd3d/include/private/vkd3d_memory.h | 9 + - libs/vkd3d/include/vkd3d_shader.h | 17 +- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 30 +++ - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 8 + - libs/vkd3d/libs/vkd3d-shader/dxil.c | 254 ++++++++++++++++++ - libs/vkd3d/libs/vkd3d-shader/fx.c | 68 ++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 40 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 12 +- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 55 +++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 36 ++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 121 +++------ - .../libs/vkd3d-shader/vkd3d_shader_private.h | 11 + - 14 files changed, 539 insertions(+), 125 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 974ff9446db..1cc8ecc38f3 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -75,6 +75,8 @@ - #define TAG_XNAP VKD3D_MAKE_TAG('X', 'N', 'A', 'P') - #define TAG_XNAS VKD3D_MAKE_TAG('X', 'N', 'A', 'S') - -+#define TAG_RD11_REVERSE 0x25441313 -+ - static inline uint64_t align(uint64_t addr, size_t alignment) - { - return (addr + (alignment - 1)) & ~(alignment - 1); -diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h -index bb177e39add..682d35c03c6 100644 ---- a/libs/vkd3d/include/private/vkd3d_memory.h -+++ b/libs/vkd3d/include/private/vkd3d_memory.h -@@ -65,6 +65,15 @@ static inline char *vkd3d_strdup(const char *string) - return ptr; - } - -+static inline void *vkd3d_memdup(const void *mem, size_t size) -+{ -+ void *ptr; -+ -+ if ((ptr = vkd3d_malloc(size))) -+ memcpy(ptr, mem, size); -+ return ptr; -+} -+ - bool vkd3d_array_reserve(void **elements, size_t *capacity, size_t element_count, size_t element_size); - - #endif /* __VKD3D_MEMORY_H */ -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 83b90474af4..0ce2ef67b50 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -302,12 +302,25 @@ enum vkd3d_shader_compile_option_name - /** - * If \a value is non-zero compilation will produce a child effect using - * shared object descriptions, as instructed by the "shared" modifier. -- * Child effects are supported with fx_2_0, fx_4_0, and fx_4_1. This option -- * and "shared" modifiers are ignored for fx_5_0 profile, and non-fx profiles. -+ * Child effects are supported with fx_4_0, and fx_4_1 profiles. This option -+ * and "shared" modifiers are ignored for the fx_5_0 profile and non-fx profiles. -+ * The fx_2_0 profile does not have a separate concept of child effects, variables -+ * marked with "shared" modifier will be marked as such in a binary. - * - * \since 1.12 - */ - VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT = 0x0000000b, -+ /** -+ * If \a value is nonzero, emit a compile warning warn when vectors or -+ * matrices are truncated in an implicit conversion. -+ * If warnings are disabled, this option has no effect. -+ * This option has no effects for targets other than HLSL. -+ * -+ * The default value is nonzero, i.e. enable implicit truncation warnings. -+ * -+ * \since 1.12 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION = 0x0000000c, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 0623a129eae..2b5feb94103 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -448,6 +448,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, - vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); - } - -+static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) -+{ -+ if (atomic_flags & VKD3DARF_SEQ_CST) -+ { -+ vkd3d_string_buffer_printf(&compiler->buffer, "_seqCst"); -+ atomic_flags &= ~VKD3DARF_SEQ_CST; -+ } -+ if (atomic_flags & VKD3DARF_VOLATILE) -+ { -+ vkd3d_string_buffer_printf(&compiler->buffer, "_volatile"); -+ atomic_flags &= ~VKD3DARF_VOLATILE; -+ } -+ -+ if (atomic_flags) -+ vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", atomic_flags); -+} -+ - static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) - { - if (sync_flags & VKD3DSSF_GLOBAL_UAV) -@@ -1734,6 +1751,19 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - } - break; - -+ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: -+ case VKD3DSIH_IMM_ATOMIC_IADD: -+ case VKD3DSIH_IMM_ATOMIC_AND: -+ case VKD3DSIH_IMM_ATOMIC_IMAX: -+ case VKD3DSIH_IMM_ATOMIC_IMIN: -+ case VKD3DSIH_IMM_ATOMIC_OR: -+ case VKD3DSIH_IMM_ATOMIC_UMAX: -+ case VKD3DSIH_IMM_ATOMIC_UMIN: -+ case VKD3DSIH_IMM_ATOMIC_EXCH: -+ case VKD3DSIH_IMM_ATOMIC_XOR: -+ shader_dump_atomic_op_flags(compiler, ins->flags); -+ break; -+ - case VKD3DSIH_SYNC: - shader_dump_sync_flags(compiler, ins->flags); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 3b935b07d61..099729fbb6c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -2333,6 +2333,14 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - break; - -+ case HLSL_OP2_LOGIC_AND: -+ write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ -+ case HLSL_OP2_LOGIC_OR: -+ write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ - case HLSL_OP2_SLT: - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 7f9a74fa737..0787ee13930 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -401,10 +401,15 @@ enum dx_intrinsic_opcode - DX_ATOMIC_BINOP = 78, - DX_ATOMIC_CMP_XCHG = 79, - DX_BARRIER = 80, -+ DX_DISCARD = 82, - DX_DERIV_COARSEX = 83, - DX_DERIV_COARSEY = 84, - DX_DERIV_FINEX = 85, - DX_DERIV_FINEY = 86, -+ DX_THREAD_ID = 93, -+ DX_GROUP_ID = 94, -+ DX_THREAD_ID_IN_GROUP = 95, -+ DX_FLATTENED_THREAD_ID_IN_GROUP = 96, - DX_SPLIT_DOUBLE = 102, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, -@@ -459,6 +464,32 @@ enum dxil_predicate - ICMP_SLE = 41, - }; - -+enum dxil_rmw_code -+{ -+ RMW_XCHG = 0, -+ RMW_ADD = 1, -+ RMW_SUB = 2, -+ RMW_AND = 3, -+ RMW_NAND = 4, -+ RMW_OR = 5, -+ RMW_XOR = 6, -+ RMW_MAX = 7, -+ RMW_MIN = 8, -+ RMW_UMAX = 9, -+ RMW_UMIN = 10, -+}; -+ -+enum dxil_atomic_ordering -+{ -+ ORDERING_NOTATOMIC = 0, -+ ORDERING_UNORDERED = 1, -+ ORDERING_MONOTONIC = 2, -+ ORDERING_ACQUIRE = 3, -+ ORDERING_RELEASE = 4, -+ ORDERING_ACQREL = 5, -+ ORDERING_SEQCST = 6, -+}; -+ - enum dxil_atomic_binop_code - { - ATOMIC_BINOP_ADD, -@@ -758,6 +789,7 @@ struct sm6_parser - - struct vkd3d_shader_dst_param *output_params; - struct vkd3d_shader_dst_param *input_params; -+ uint32_t input_regs_declared[(VKD3DSPR_COUNT + 0x1f) / 0x20]; - - struct sm6_function *functions; - size_t function_count; -@@ -2588,6 +2620,18 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 - return true; - } - -+static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6) -+{ -+ if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type)) -+ { -+ WARN("Operand result type %u is not a pointer to i32.\n", value->type->class); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "An int32 pointer operand passed to a DXIL instruction is not an int32 pointer."); -+ return false; -+ } -+ return true; -+} -+ - static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) - { - if (idx < sm6->value_count) -@@ -2960,6 +3004,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - - default: - FIXME("Unhandled constant code %u.\n", record->code); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Constant code %u is unhandled.", record->code); - dst->u.reg.type = VKD3DSPR_UNDEF; - break; - } -@@ -3500,6 +3546,9 @@ struct function_emission_state - unsigned int temp_idx; - }; - -+static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, -+ unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg); -+ - static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, - struct vkd3d_shader_instruction *ins, struct sm6_value *dst) - { -@@ -3575,6 +3624,129 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec - sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); - } - -+static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) -+{ -+ switch (code) -+ { -+ case RMW_ADD: -+ return VKD3DSIH_IMM_ATOMIC_IADD; -+ case RMW_AND: -+ return VKD3DSIH_IMM_ATOMIC_AND; -+ case RMW_MAX: -+ return VKD3DSIH_IMM_ATOMIC_IMAX; -+ case RMW_MIN: -+ return VKD3DSIH_IMM_ATOMIC_IMIN; -+ case RMW_OR: -+ return VKD3DSIH_IMM_ATOMIC_OR; -+ case RMW_UMAX: -+ return VKD3DSIH_IMM_ATOMIC_UMAX; -+ case RMW_UMIN: -+ return VKD3DSIH_IMM_ATOMIC_UMIN; -+ case RMW_XCHG: -+ return VKD3DSIH_IMM_ATOMIC_EXCH; -+ case RMW_XOR: -+ return VKD3DSIH_IMM_ATOMIC_XOR; -+ default: -+ /* DXIL currently doesn't use SUB and NAND. */ -+ return VKD3DSIH_INVALID; -+ } -+} -+ -+static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct function_emission_state *state, struct sm6_value *dst) -+{ -+ struct vkd3d_shader_register coord, const_offset, const_zero; -+ const struct vkd3d_shader_register *regs[2]; -+ struct vkd3d_shader_dst_param *dst_params; -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ const struct sm6_value *ptr, *src; -+ enum vkd3d_shader_opcode op; -+ unsigned int i = 0; -+ bool is_volatile; -+ uint64_t code; -+ -+ if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -+ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) -+ return; -+ -+ if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) -+ { -+ WARN("Register is not groupshared.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "The destination register for an atomicrmw instruction is not groupshared memory."); -+ return; -+ } -+ -+ dst->type = ptr->type->u.pointer.type; -+ -+ if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i))) -+ return; -+ -+ if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6)) -+ return; -+ -+ if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VKD3DSIH_INVALID) -+ { -+ FIXME("Unhandled atomicrmw op %"PRIu64".\n", code); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); -+ return; -+ } -+ -+ is_volatile = record->operands[i++]; -+ -+ /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ -+ if ((code = record->operands[i++]) != ORDERING_SEQCST) -+ FIXME("Unhandled atomic ordering %"PRIu64".\n", code); -+ -+ if ((code = record->operands[i]) != 1) -+ WARN("Ignoring synchronisation scope %"PRIu64".\n", code); -+ -+ if (ptr->structure_stride) -+ { -+ if (ptr->u.reg.idx[1].rel_addr) -+ { -+ regs[0] = &ptr->u.reg.idx[1].rel_addr->reg; -+ } -+ else -+ { -+ register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset); -+ regs[0] = &const_offset; -+ } -+ register_make_constant_uint(&const_zero, 0); -+ regs[1] = &const_zero; -+ if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) -+ return; -+ } -+ -+ ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, op); -+ ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; -+ -+ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) -+ return; -+ if (ptr->structure_stride) -+ src_param_init_vector_from_reg(&src_params[0], &coord); -+ else -+ src_param_make_constant_uint(&src_params[0], 0); -+ src_param_init_from_value(&src_params[1], src); -+ -+ dst_params = instruction_dst_params_alloc(ins, 2, sm6); -+ register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); -+ dst_param_init(&dst_params[0]); -+ -+ dst_params[1].reg = ptr->u.reg; -+ /* The groupshared register has data type UAV when accessed. */ -+ dst_params[1].reg.data_type = VKD3D_DATA_UAV; -+ dst_params[1].reg.idx[1].rel_addr = NULL; -+ dst_params[1].reg.idx[1].offset = ~0u; -+ dst_params[1].reg.idx_count = 1; -+ dst_param_init(&dst_params[1]); -+ -+ dst->u.reg = dst_params[0].reg; -+} -+ - static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, - const struct sm6_type *type_b, struct sm6_parser *sm6) - { -@@ -4212,6 +4384,22 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr - instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6); - } - -+static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, -+ enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int component_count) -+{ -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!bitmap_is_set(sm6->input_regs_declared, reg_type)) -+ { -+ bitmap_set(sm6->input_regs_declared, reg_type); -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_INPUT); -+ dst_param = &ins->declaration.dst; -+ vsir_register_init(&dst_param->reg, reg_type, data_type, 0); -+ dst_param_init_vector(dst_param, component_count); -+ } -+} -+ - static const struct sm6_descriptor_info *sm6_parser_get_descriptor(struct sm6_parser *sm6, - enum vkd3d_shader_descriptor_type type, unsigned int id, const struct sm6_value *address) - { -@@ -4273,6 +4461,18 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int - ins->handler_idx = VKD3DSIH_NOP; - } - -+static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_DISCARD); -+ -+ if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ src_param_init_from_value(src_param, operands[0]); -+} -+ - static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4288,6 +4488,48 @@ static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_op - instruction_dst_param_init_ssa_scalar(ins, sm6); - } - -+static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ unsigned int component_count = 3, component_idx = 0; -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ enum vkd3d_shader_register_type reg_type; -+ -+ switch (op) -+ { -+ case DX_THREAD_ID: -+ reg_type = VKD3DSPR_THREADID; -+ break; -+ case DX_GROUP_ID: -+ reg_type = VKD3DSPR_THREADGROUPID; -+ break; -+ case DX_THREAD_ID_IN_GROUP: -+ reg_type = VKD3DSPR_LOCALTHREADID; -+ break; -+ case DX_FLATTENED_THREAD_ID_IN_GROUP: -+ reg_type = VKD3DSPR_LOCALTHREADINDEX; -+ component_count = 1; -+ break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ sm6_parser_dcl_register_builtin(sm6, reg_type, VKD3D_DATA_UINT, component_count); -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ vsir_register_init(&src_param->reg, reg_type, VKD3D_DATA_UINT, 0); -+ if (component_count > 1) -+ { -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ component_idx = sm6_value_get_constant_uint(operands[0]); -+ } -+ src_param_init_scalar(src_param, component_idx); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static enum vkd3d_shader_opcode sm6_dx_map_ma_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) - { - switch (op) -@@ -5086,17 +5328,20 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_DERIV_COARSEY ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, -+ [DX_DISCARD ] = {"v", "1", sm6_parser_emit_dx_discard}, - [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, - [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, - [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, - [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, -+ [DX_FLATTENED_THREAD_ID_IN_GROUP ] = {"i", "", sm6_parser_emit_dx_compute_builtin}, - [DX_FMA ] = {"g", "RRR", sm6_parser_emit_dx_ma}, - [DX_FMAD ] = {"g", "RRR", sm6_parser_emit_dx_ma}, - [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, - [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, - [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_GET_DIMENSIONS ] = {"D", "Hi", sm6_parser_emit_dx_get_dimensions}, -+ [DX_GROUP_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, - [DX_IBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, - [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, -@@ -5134,6 +5379,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, - [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, - [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, -+ [DX_THREAD_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, -+ [DX_THREAD_ID_IN_GROUP ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, - [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, - [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, - [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, -@@ -6607,6 +6854,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - case FUNC_CODE_INST_ALLOCA: - sm6_parser_emit_alloca(sm6, record, ins, dst); - break; -+ case FUNC_CODE_INST_ATOMICRMW: -+ { -+ struct function_emission_state state = {code_block, ins}; -+ sm6_parser_emit_atomicrmw(sm6, record, &state, dst); -+ sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); -+ break; -+ } - case FUNC_CODE_INST_BINOP: - sm6_parser_emit_binop(sm6, record, ins, dst); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index f2be00da33a..9424a5685a7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -611,11 +611,15 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f - { - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - const char *s = string ? string : ""; -+ static const char tail[3]; - uint32_t size, offset; - - size = strlen(s) + 1; - offset = put_u32(buffer, size); - bytecode_put_bytes(buffer, s, size); -+ size %= 4; -+ if (size) -+ bytecode_put_bytes_unaligned(buffer, tail, 4 - size); - return offset; - } - -@@ -639,10 +643,13 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - - switch (type->base_type) - { -+ case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: - case HLSL_TYPE_VOID: -+ case HLSL_TYPE_TEXTURE: - break; - default: - hlsl_fixme(ctx, &ctx->location, "Writing parameter type %u is not implemented.", -@@ -766,21 +773,72 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f - return offset; - } - -+static bool is_type_supported_fx_2(const struct hlsl_type *type) -+{ -+ type = hlsl_get_multiarray_element_type(type); -+ -+ if (type->class == HLSL_CLASS_STRUCT) -+ return true; -+ -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_DOUBLE: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_PIXELSHADER: -+ case HLSL_TYPE_VERTEXSHADER: -+ case HLSL_TYPE_STRING: -+ return true; -+ case HLSL_TYPE_TEXTURE: -+ case HLSL_TYPE_SAMPLER: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ case HLSL_SAMPLER_DIM_2D: -+ case HLSL_SAMPLER_DIM_3D: -+ case HLSL_SAMPLER_DIM_CUBE: -+ case HLSL_SAMPLER_DIM_GENERIC: -+ return true; -+ default: -+ ; -+ } -+ break; -+ default: -+ return false; -+ } -+ -+ return false; -+} -+ - static void write_fx_2_parameters(struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -- uint32_t desc_offset, value_offset; -+ uint32_t desc_offset, value_offset, flags; - struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_ir_var *var; -+ enum fx_2_parameter_flags -+ { -+ IS_SHARED = 0x1, -+ }; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -+ if (!is_type_supported_fx_2(var->data_type)) -+ continue; -+ - desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); - value_offset = write_fx_2_initial_value(var, fx); - -+ flags = 0; -+ if (var->storage_modifiers & HLSL_STORAGE_SHARED) -+ flags |= IS_SHARED; -+ - put_u32(buffer, desc_offset); /* Parameter description */ - put_u32(buffer, value_offset); /* Value */ -- put_u32(buffer, 0); /* Flags */ -+ put_u32(buffer, flags); /* Flags */ - - put_u32(buffer, 0); /* Annotations count */ - if (has_annotations(var)) -@@ -799,12 +857,13 @@ static const struct fx_write_context_ops fx_2_ops = - - static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - { -- uint32_t offset, size, technique_count, parameter_count; -+ uint32_t offset, size, technique_count, parameter_count, object_count; - struct vkd3d_bytecode_buffer buffer = { 0 }; - struct vkd3d_bytecode_buffer *structured; - struct fx_write_context fx; - - fx_write_context_init(ctx, &fx_2_ops, &fx); -+ fx.object_variable_count = 1; - structured = &fx.structured; - - /* First entry is always zeroed and skipped. */ -@@ -816,10 +875,11 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - parameter_count = put_u32(structured, 0); /* Parameter count */ - technique_count = put_u32(structured, 0); - put_u32(structured, 0); /* Unknown */ -- put_u32(structured, 0); /* Object count */ -+ object_count = put_u32(structured, 0); - - write_fx_2_parameters(&fx); - set_u32(structured, parameter_count, fx.parameter_count); -+ set_u32(structured, object_count, fx.object_variable_count); - - write_techniques(ctx->globals, &fx); - set_u32(structured, technique_count, fx.technique_count); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index a82334e58fd..cba954c988f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -785,6 +785,7 @@ static const char * get_case_insensitive_typename(const char *name) - "float", - "matrix", - "pixelshader", -+ "texture", - "vector", - "vertexshader", - }; -@@ -3408,7 +3409,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - {"fxgroup", HLSL_CLASS_OBJECT, HLSL_TYPE_EFFECT_GROUP, 1, 1}, - {"pass", HLSL_CLASS_OBJECT, HLSL_TYPE_PASS, 1, 1}, - {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, -- {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, -+ {"texture", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, - {"pixelshader", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, - {"vertexshader", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, - {"RenderTargetView",HLSL_CLASS_OBJECT, HLSL_TYPE_RENDERTARGETVIEW, 1, 1}, -@@ -3592,24 +3593,35 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil - return false; - ctx->cur_buffer = ctx->globals_buffer; - -+ ctx->warn_implicit_truncation = true; -+ - for (i = 0; i < compile_info->option_count; ++i) - { - const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; - -- if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) -- { -- if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) -- ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; -- else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) -- ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; -- } -- else if (option->name == VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY) -- { -- ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; -- } -- else if (option->name == VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT) -+ switch (option->name) - { -- ctx->child_effect = !!option->value; -+ case VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER: -+ if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; -+ else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; -+ break; -+ -+ case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: -+ ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; -+ break; -+ -+ case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: -+ ctx->child_effect = option->value; -+ break; -+ -+ case VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION: -+ ctx->warn_implicit_truncation = option->value; -+ break; -+ -+ default: -+ break; - } - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 561782efbf8..c6321f2ead2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -925,6 +925,7 @@ struct hlsl_ctx - - bool semantic_compat_mapping; - bool child_effect; -+ bool warn_implicit_truncation; - }; - - struct hlsl_resource_load_params -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index ec8b3d22af2..52c2176542c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -413,7 +413,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - return NULL; - } - -- if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) -+ if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy && ctx->warn_implicit_truncation) - hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", - src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); - -@@ -438,8 +438,9 @@ static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t - - static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) - { -- struct hlsl_ir_node *condition, *not, *iff, *jump; -+ struct hlsl_ir_node *condition, *cast, *not, *iff, *jump; - struct hlsl_block then_block; -+ struct hlsl_type *bool_type; - - /* E.g. "for (i = 0; ; ++i)". */ - if (list_empty(&cond_block->instrs)) -@@ -449,7 +450,12 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co - - check_condition_type(ctx, condition); - -- if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) -+ bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); -+ if (!(cast = hlsl_new_cast(ctx, condition, bool_type, &condition->loc))) -+ return false; -+ hlsl_block_add_instr(cond_block, cast); -+ -+ if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc))) - return false; - hlsl_block_add_instr(cond_block, not); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index ff349ab49ef..5c09ce04f5b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -2902,6 +2902,55 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - return true; - } - -+static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; -+ struct hlsl_ir_node *arg, *arg_cast, *neg, *one, *sub, *res; -+ struct hlsl_constant_value one_value; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_expr *expr; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP1_LOGIC_NOT) -+ return false; -+ -+ arg = expr->operands[0].node; -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); -+ -+ /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ -+ assert(arg->data_type->base_type == HLSL_TYPE_BOOL); -+ -+ if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg_cast); -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg_cast, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg); -+ -+ one_value.u[0].f = 1.0; -+ one_value.u[1].f = 1.0; -+ one_value.u[2].f = 1.0; -+ one_value.u[3].f = 1.0; -+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, one); -+ -+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg))) -+ return false; -+ hlsl_block_add_instr(block, sub); -+ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = sub; -+ if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, res); -+ -+ return true; -+} -+ - /* Use movc/cmp for the ternary operator. */ - static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -@@ -3573,6 +3622,8 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - case HLSL_OP1_NEG: - case HLSL_OP2_ADD: - case HLSL_OP2_DIV: -+ case HLSL_OP2_LOGIC_AND: -+ case HLSL_OP2_LOGIC_OR: - case HLSL_OP2_MAX: - case HLSL_OP2_MIN: - case HLSL_OP2_MUL: -@@ -3761,9 +3812,6 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - { - unsigned int r; - -- if (!hlsl_type_is_resource(var->data_type)) -- continue; -- - if (var->reg_reservation.reg_type) - { - for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) -@@ -5420,6 +5468,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_ceil, body); - lower_ir(ctx, lower_floor, body); - lower_ir(ctx, lower_comparison_operators, body); -+ lower_ir(ctx, lower_logic_not, body); - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - lower_ir(ctx, lower_slt, body); - else -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 673400efd69..1cae2d7d9d4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -3396,7 +3396,7 @@ struct vkd3d_shader_register_info - bool is_aggregate; - }; - --static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, -+static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_info *register_info) - { - struct vkd3d_symbol reg_symbol, *symbol; -@@ -3422,7 +3422,8 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil - vkd3d_symbol_make_register(®_symbol, reg); - if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) - { -- FIXME("Unrecognized register (%s).\n", debug_vkd3d_symbol(®_symbol)); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE, -+ "Unrecognized register (%s).\n", debug_vkd3d_symbol(®_symbol)); - memset(register_info, 0, sizeof(*register_info)); - return false; - } -@@ -4094,7 +4095,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - struct vkd3d_shader_register_info reg_info; - unsigned int component_count; - uint32_t type_id, val_id; -- uint32_t write_mask32; -+ uint32_t val_write_mask; - - if (reg->type == VKD3DSPR_IMMCONST) - return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); -@@ -4114,17 +4115,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - return vkd3d_spirv_get_op_undef(builder, type_id); - } -- assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); - spirv_compiler_emit_dereference_register(compiler, reg, ®_info); - -- write_mask32 = data_type_is_64_bit(reg->data_type) ? vsir_write_mask_32_from_64(write_mask) : write_mask; -+ val_write_mask = (data_type_is_64_bit(reg->data_type) && !component_type_is_64_bit(reg_info.component_type)) -+ ? vsir_write_mask_32_from_64(write_mask) : write_mask; - - /* Intermediate value (no storage class). */ - if (reg_info.storage_class == SpvStorageClassMax) - { - val_id = reg_info.id; - } -- else if (vsir_write_mask_component_count(write_mask32) == 1) -+ else if (vsir_write_mask_component_count(val_write_mask) == 1) - { - return spirv_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); - } -@@ -4137,7 +4138,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - - swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; - val_id = spirv_compiler_emit_swizzle(compiler, -- val_id, reg_info.write_mask, reg_info.component_type, swizzle, write_mask32); -+ val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask); - - if (component_type != reg_info.component_type) - { -@@ -7123,6 +7124,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( - } - glsl_insts[] = - { -+ {VKD3DSIH_ABS, GLSLstd450FAbs}, - {VKD3DSIH_ACOS, GLSLstd450Acos}, - {VKD3DSIH_ASIN, GLSLstd450Asin}, - {VKD3DSIH_ATAN, GLSLstd450Atan}, -@@ -8005,8 +8007,9 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, - * a mismatch between the VSIR structure and the SPIR-V one, which would cause problems if - * structurisation is necessary. Therefore we emit it as a function call. */ - condition_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); -- condition_id = spirv_compiler_emit_int_to_bool(compiler, -- instruction->flags, src->reg.data_type, 1, condition_id); -+ if (src->reg.data_type != VKD3D_DATA_BOOL) -+ condition_id = spirv_compiler_emit_int_to_bool(compiler, -+ instruction->flags, src->reg.data_type, 1, condition_id); - void_id = vkd3d_spirv_get_op_type_void(builder); - vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), - &condition_id, 1); -@@ -8785,7 +8788,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, - ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); - constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); - } -- assert(dst->reg.data_type == VKD3D_DATA_UINT); - spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); - } - -@@ -8907,7 +8909,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, - type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - - data = &src[instruction->src_count - 1]; -- assert(data->reg.data_type == VKD3D_DATA_UINT); - val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); - - component_count = vsir_write_mask_component_count(dst->write_mask); -@@ -9159,6 +9160,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - const struct vkd3d_shader_dst_param *resource; - uint32_t coordinate_id, sample_id, pointer_id; - struct vkd3d_shader_register_info reg_info; -+ SpvMemorySemanticsMask memory_semantic; - struct vkd3d_shader_image image; - unsigned int structure_stride; - uint32_t coordinate_mask; -@@ -9250,12 +9252,19 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - - val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type); - -+ if (instruction->flags & VKD3DARF_VOLATILE) -+ WARN("Ignoring 'volatile' attribute.\n"); -+ -+ memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) -+ ? SpvMemorySemanticsSequentiallyConsistentMask -+ : SpvMemorySemanticsMaskNone; -+ - operands[i++] = pointer_id; - operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); -- operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); -+ operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); - if (instruction->src_count >= 3) - { -- operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); -+ operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); - operands[i++] = spirv_compiler_emit_load_src_with_type(compiler, &src[2], VKD3DSP_WRITEMASK_0, component_type); - } - operands[i++] = val_id; -@@ -9808,6 +9817,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_ISFINITE: - spirv_compiler_emit_isfinite(compiler, instruction); - break; -+ case VKD3DSIH_ABS: - case VKD3DSIH_ACOS: - case VKD3DSIH_ASIN: - case VKD3DSIH_ATAN: -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index cb4f6d4ddbf..4d0658313d5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2994,26 +2994,23 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - { - switch (type->class) - { -- case HLSL_CLASS_ARRAY: -- return sm4_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3D_SVC_MATRIX_COLUMNS; - else - return D3D_SVC_MATRIX_ROWS; -- case HLSL_CLASS_OBJECT: -- return D3D_SVC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3D_SVC_SCALAR; -- case HLSL_CLASS_STRUCT: -- return D3D_SVC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3D_SVC_VECTOR; -- default: -- ERR("Invalid class %#x.\n", type->class); -- vkd3d_unreachable(); -+ -+ case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_STRUCT: -+ case HLSL_CLASS_OBJECT: -+ break; - } -+ vkd3d_unreachable(); - } - - static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) -@@ -3029,68 +3026,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) - return D3D_SVT_FLOAT; - case HLSL_TYPE_INT: - return D3D_SVT_INT; -- case HLSL_TYPE_PIXELSHADER: -- return D3D_SVT_PIXELSHADER; -- case HLSL_TYPE_SAMPLER: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SVT_SAMPLER1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SVT_SAMPLER2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SVT_SAMPLER3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3D_SVT_SAMPLERCUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3D_SVT_SAMPLER; -- default: -- vkd3d_unreachable(); -- } -- break; -- case HLSL_TYPE_STRING: -- return D3D_SVT_STRING; -- case HLSL_TYPE_TEXTURE: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SVT_TEXTURE1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SVT_TEXTURE2D; -- case HLSL_SAMPLER_DIM_2DMS: -- return D3D_SVT_TEXTURE2DMS; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SVT_TEXTURE3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3D_SVT_TEXTURECUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3D_SVT_TEXTURE; -- default: -- vkd3d_unreachable(); -- } -- break; - case HLSL_TYPE_UINT: - return D3D_SVT_UINT; -- case HLSL_TYPE_VERTEXSHADER: -- return D3D_SVT_VERTEXSHADER; -- case HLSL_TYPE_VOID: -- return D3D_SVT_VOID; -- case HLSL_TYPE_UAV: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SVT_RWTEXTURE1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SVT_RWTEXTURE2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SVT_RWTEXTURE3D; -- case HLSL_SAMPLER_DIM_1DARRAY: -- return D3D_SVT_RWTEXTURE1DARRAY; -- case HLSL_SAMPLER_DIM_2DARRAY: -- return D3D_SVT_RWTEXTURE2DARRAY; -- default: -- vkd3d_unreachable(); -- } - default: - vkd3d_unreachable(); - } -@@ -3101,8 +3038,8 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); - const char *name = array_type->name ? array_type->name : ""; - const struct hlsl_profile_info *profile = ctx->profile; -- unsigned int field_count = 0, array_size = 0; -- size_t fields_offset = 0, name_offset = 0; -+ unsigned int array_size = 0; -+ size_t name_offset = 0; - size_t i; - - if (type->bytecode_offset) -@@ -3116,32 +3053,47 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - - if (array_type->class == HLSL_CLASS_STRUCT) - { -- field_count = array_type->e.record.field_count; -+ unsigned int field_count = 0; -+ size_t fields_offset = 0; - -- for (i = 0; i < field_count; ++i) -+ for (i = 0; i < array_type->e.record.field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - -+ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) -+ continue; -+ - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm4_type(ctx, buffer, field->type); -+ ++field_count; - } - - fields_offset = bytecode_align(buffer); - -- for (i = 0; i < field_count; ++i) -+ for (i = 0; i < array_type->e.record.field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - -+ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) -+ continue; -+ - put_u32(buffer, field->name_bytecode_offset); - put_u32(buffer, field->type->bytecode_offset); - put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); - } -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); -+ put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); -+ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -+ put_u32(buffer, fields_offset); -+ } -+ else -+ { -+ assert(array_type->class <= HLSL_CLASS_LAST_NUMERIC); -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); -+ put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); -+ put_u32(buffer, vkd3d_make_u32(array_size, 0)); -+ put_u32(buffer, 1); - } -- -- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); -- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); -- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -- put_u32(buffer, fields_offset); - - if (profile->major_version >= 5) - { -@@ -3333,7 +3285,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - - extern_resources[*count].name = name; - extern_resources[*count].data_type = component_type; -- extern_resources[*count].is_user_packed = false; -+ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; - - extern_resources[*count].regset = regset; - extern_resources[*count].id = var->regs[regset].id + regset_offset; -@@ -3528,8 +3480,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform && var->buffer == cbuffer -- && var->data_type->class != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - ++var_count; - } - -@@ -3563,8 +3514,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform && var->buffer == cbuffer -- && var->data_type->class != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - { - uint32_t flags = 0; - -@@ -3591,8 +3541,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - j = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform && var->buffer == cbuffer -- && var->data_type->class != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - { - const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); - size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index a33b6d2d967..6d442cd517d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -755,6 +755,12 @@ enum vkd3d_shader_uav_flags - VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, - }; - -+enum vkd3d_shader_atomic_rmw_flags -+{ -+ VKD3DARF_SEQ_CST = 0x1, -+ VKD3DARF_VOLATILE = 0x2, -+}; -+ - enum vkd3d_tessellator_domain - { - VKD3D_TESSELLATOR_DOMAIN_LINE = 1, -@@ -1611,6 +1617,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc - } - } - -+static inline bool component_type_is_64_bit(enum vkd3d_shader_component_type component_type) -+{ -+ return component_type == VKD3D_SHADER_COMPONENT_DOUBLE || component_type == VKD3D_SHADER_COMPONENT_UINT64; -+} -+ - enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, - unsigned int index); - --- -2.43.0 - diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-4b0a328a2b58a86e3529ddcc2cdc785a086.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-4b0a328a2b58a86e3529ddcc2cdc785a086.patch deleted file mode 100644 index 672860d9..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-4b0a328a2b58a86e3529ddcc2cdc785a086.patch +++ /dev/null @@ -1,815 +0,0 @@ -From 5a6ba406d2b64084bfb8180843d58fe0e8baf63d Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 4 Apr 2024 09:47:35 +1100 -Subject: [PATCH] Updated vkd3d to 4b0a328a2b58a86e3529ddcc2cdc785a08625f81. - ---- - libs/vkd3d/include/vkd3d_shader.h | 9 + - libs/vkd3d/libs/vkd3d-shader/dxil.c | 76 ++++++ - libs/vkd3d/libs/vkd3d-shader/fx.c | 9 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 12 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 7 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 71 +++--- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 25 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 248 ++++++++++---------- - 9 files changed, 265 insertions(+), 193 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 0ce2ef67b50..5cc36e186e2 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -21,6 +21,7 @@ - - #include - #include -+#include - #include - - #ifdef __cplusplus -@@ -321,6 +322,14 @@ enum vkd3d_shader_compile_option_name - * \since 1.12 - */ - VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION = 0x0000000c, -+ /** -+ * If \a value is nonzero, empty constant buffers descriptions are -+ * written out in the output effect binary. This option applies only -+ * to fx_4_0 and fx_4_1 profiles and is otherwise ignored. -+ * -+ * \since 1.12 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS = 0x0000000d, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 0787ee13930..b5a61d99d3f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -382,6 +382,9 @@ enum dx_intrinsic_opcode - DX_UMAD = 49, - DX_IBFE = 51, - DX_UBFE = 52, -+ DX_DOT2 = 54, -+ DX_DOT3 = 55, -+ DX_DOT4 = 56, - DX_CREATE_HANDLE = 57, - DX_CBUFFER_LOAD_LEGACY = 59, - DX_SAMPLE = 60, -@@ -410,6 +413,7 @@ enum dx_intrinsic_opcode - DX_GROUP_ID = 94, - DX_THREAD_ID_IN_GROUP = 95, - DX_FLATTENED_THREAD_ID_IN_GROUP = 96, -+ DX_MAKE_DOUBLE = 101, - DX_SPLIT_DOUBLE = 102, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, -@@ -2372,6 +2376,12 @@ static void src_param_init_scalar(struct vkd3d_shader_src_param *param, unsigned - param->modifiers = VKD3DSPSM_NONE; - } - -+static void src_param_init_vector(struct vkd3d_shader_src_param *param, unsigned int component_count) -+{ -+ param->swizzle = VKD3D_SHADER_NO_SWIZZLE & ((1ull << VKD3D_SHADER_SWIZZLE_SHIFT(component_count)) - 1); -+ param->modifiers = VKD3DSPSM_NONE; -+} -+ - static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) - { - src_param_init(param); -@@ -4473,6 +4483,48 @@ static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic - src_param_init_from_value(src_param, operands[0]); - } - -+static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_register regs[2]; -+ enum vkd3d_shader_opcode handler_idx; -+ unsigned int component_count; -+ -+ switch (op) -+ { -+ case DX_DOT2: -+ handler_idx = VKD3DSIH_DP2; -+ component_count = 2; -+ break; -+ case DX_DOT3: -+ handler_idx = VKD3DSIH_DP3; -+ component_count = 3; -+ break; -+ case DX_DOT4: -+ handler_idx = VKD3DSIH_DP4; -+ component_count = 4; -+ break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (!sm6_parser_emit_composite_construct(sm6, &operands[0], component_count, state, ®s[0])) -+ return; -+ if (!sm6_parser_emit_composite_construct(sm6, &operands[component_count], component_count, state, ®s[1])) -+ return; -+ -+ ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, handler_idx); -+ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) -+ return; -+ src_param_init_vector_from_reg(&src_params[0], ®s[0]); -+ src_param_init_vector_from_reg(&src_params[1], ®s[1]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4699,6 +4751,26 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin - instruction_dst_param_init_ssa_scalar(ins, sm6); - } - -+static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_register reg; -+ -+ if (!sm6_parser_emit_composite_construct(sm6, &operands[0], 2, state, ®)) -+ return; -+ -+ ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_params[0].reg = reg; -+ src_param_init_vector(&src_params[0], 2); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -5329,6 +5401,9 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DISCARD ] = {"v", "1", sm6_parser_emit_dx_discard}, -+ [DX_DOT2 ] = {"g", "RRRR", sm6_parser_emit_dx_dot}, -+ [DX_DOT3 ] = {"g", "RRRRRR", sm6_parser_emit_dx_dot}, -+ [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, - [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, - [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, -@@ -5356,6 +5431,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_LEGACY_F32TOF16 ] = {"i", "f", sm6_parser_emit_dx_unary}, - [DX_LOAD_INPUT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, - [DX_LOG ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, - [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, - [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, - [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 9424a5685a7..466908cd82b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -90,6 +90,7 @@ struct fx_write_context - int status; - - bool child_effect; -+ bool include_empty_buffers; - - const struct fx_write_context_ops *ops; - }; -@@ -191,6 +192,7 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co - list_init(&fx->types); - - fx->child_effect = fx->ops->are_child_effects_supported && ctx->child_effect; -+ fx->include_empty_buffers = version == 4 && ctx->include_empty_buffers; - - hlsl_block_init(&block); - hlsl_prepend_global_uniform_copy(fx->ctx, &block); -@@ -1038,7 +1040,8 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - put_u32(buffer, bind_point); /* Bind point */ - - put_u32(buffer, 0); /* Annotations count */ -- /* FIXME: write annotations */ -+ if (b->annotations) -+ hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); - - count = 0; - size = 0; -@@ -1064,7 +1067,9 @@ static void write_buffers(struct fx_write_context *fx) - - LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) - { -- if (!buffer->size) -+ if (!buffer->size && !fx->include_empty_buffers) -+ continue; -+ if (!strcmp(buffer->name, "$Params")) - continue; - - write_fx_4_buffer(buffer, fx); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index cba954c988f..5638a03a8f5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -2031,7 +2031,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, - } - - struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, -- uint32_t modifiers, const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) -+ uint32_t modifiers, const struct hlsl_reg_reservation *reservation, struct hlsl_scope *annotations, -+ const struct vkd3d_shader_location *loc) - { - struct hlsl_buffer *buffer; - -@@ -2042,6 +2043,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type - buffer->modifiers = modifiers; - if (reservation) - buffer->reservation = *reservation; -+ buffer->annotations = annotations; - buffer->loc = *loc; - list_add_tail(&ctx->buffers, &buffer->entry); - return buffer; -@@ -3586,10 +3588,10 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil - list_init(&ctx->buffers); - - if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, -- hlsl_strdup(ctx, "$Globals"), 0, NULL, &ctx->location))) -+ hlsl_strdup(ctx, "$Globals"), 0, NULL, NULL, &ctx->location))) - return false; - if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, -- hlsl_strdup(ctx, "$Params"), 0, NULL, &ctx->location))) -+ hlsl_strdup(ctx, "$Params"), 0, NULL, NULL, &ctx->location))) - return false; - ctx->cur_buffer = ctx->globals_buffer; - -@@ -3620,6 +3622,10 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil - ctx->warn_implicit_truncation = option->value; - break; - -+ case VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS: -+ ctx->include_empty_buffers = option->value; -+ break; -+ - default: - break; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index c6321f2ead2..aa9cb14fc8d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -375,6 +375,7 @@ struct hlsl_attribute - #define HLSL_STORAGE_NOPERSPECTIVE 0x00008000 - #define HLSL_STORAGE_LINEAR 0x00010000 - #define HLSL_MODIFIER_SINGLE 0x00020000 -+#define HLSL_MODIFIER_EXPORT 0x00040000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -806,6 +807,8 @@ struct hlsl_buffer - * If provided, it should be of type 'b' if type is HLSL_BUFFER_CONSTANT and 't' if type is - * HLSL_BUFFER_TEXTURE. */ - struct hlsl_reg_reservation reservation; -+ /* Scope that contains annotations for this buffer. */ -+ struct hlsl_scope *annotations; - /* Item entry for hlsl_ctx.buffers */ - struct list entry; - -@@ -925,6 +928,7 @@ struct hlsl_ctx - - bool semantic_compat_mapping; - bool child_effect; -+ bool include_empty_buffers; - bool warn_implicit_truncation; - }; - -@@ -1228,7 +1232,8 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - struct hlsl_ir_node *arg2); - struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); - struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, -- uint32_t modifiers, const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); -+ uint32_t modifiers, const struct hlsl_reg_reservation *reservation, struct hlsl_scope *annotations, -+ const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 600e2cf2c6a..88b917eff11 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -88,6 +88,7 @@ DomainShader {return KW_DOMAINSHADER; } - do {return KW_DO; } - double {return KW_DOUBLE; } - else {return KW_ELSE; } -+export {return KW_EXPORT; } - extern {return KW_EXTERN; } - false {return KW_FALSE; } - for {return KW_FOR; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 52c2176542c..e02e0c540f9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -2091,24 +2091,23 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - } - } - --static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) -+static bool type_has_object_components(const struct hlsl_type *type) - { -- if (type->class == HLSL_CLASS_OBJECT) -- return !must_be_in_struct; - if (type->class == HLSL_CLASS_ARRAY) -- return type_has_object_components(type->e.array.type, must_be_in_struct); -+ return type_has_object_components(type->e.array.type); - - if (type->class == HLSL_CLASS_STRUCT) - { -- unsigned int i; -- -- for (i = 0; i < type->e.record.field_count; ++i) -+ for (unsigned int i = 0; i < type->e.record.field_count; ++i) - { -- if (type_has_object_components(type->e.record.fields[i].type, false)) -+ if (type_has_object_components(type->e.record.fields[i].type)) - return true; - } -+ -+ return false; - } -- return false; -+ -+ return !hlsl_is_numeric_type(type); - } - - static bool type_has_numeric_components(struct hlsl_type *type) -@@ -2146,6 +2145,18 @@ static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int mo - } - } - -+static void check_invalid_object_fields(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) -+{ -+ const struct hlsl_type *type = var->data_type; -+ -+ while (type->class == HLSL_CLASS_ARRAY) -+ type = type->e.array.type; -+ -+ if (type->class == HLSL_CLASS_STRUCT && type_has_object_components(type)) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Target profile doesn't support objects as struct members in uniform variables."); -+} -+ - static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - { - struct hlsl_type *basic_type = v->basic_type; -@@ -2271,12 +2282,8 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - if (!(modifiers & HLSL_STORAGE_STATIC)) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - -- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && -- type_has_object_components(var->data_type, true)) -- { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Target profile doesn't support objects as struct members in uniform variables."); -- } -+ if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ check_invalid_object_fields(ctx, var); - - if ((func = hlsl_get_first_func_decl(ctx, var->name))) - { -@@ -2312,7 +2319,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - } - - if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -- && type_has_object_components(var->data_type, false)) -+ && type_has_object_components(var->data_type)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Static variables cannot have both numeric and resource components."); -@@ -2400,7 +2407,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - - /* Initialize statics to zero by default. */ - -- if (type_has_object_components(var->data_type, false)) -+ if (type_has_object_components(var->data_type)) - { - free_parse_variable_def(v); - continue; -@@ -4358,22 +4365,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type - return NULL; - - for (i = 0; i < params->args_count; ++i) -- { -- struct hlsl_ir_node *arg = params->args[i]; -- -- if (arg->data_type->class == HLSL_CLASS_OBJECT) -- { -- struct vkd3d_string_buffer *string; -- -- if ((string = hlsl_type_to_string(ctx, arg->data_type))) -- hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s for constructor argument.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -- continue; -- } -- -- initialize_var_components(ctx, params->instrs, var, &idx, arg); -- } -+ initialize_var_components(ctx, params->instrs, var, &idx, params->args[i]); - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -@@ -5349,6 +5341,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_DOMAINSHADER - %token KW_DOUBLE - %token KW_ELSE -+%token KW_EXPORT - %token KW_EXTERN - %token KW_FALSE - %token KW_FOR -@@ -5673,12 +5666,12 @@ effect_group: - } - - buffer_declaration: -- var_modifiers buffer_type any_identifier colon_attribute -+ var_modifiers buffer_type any_identifier colon_attribute annotations_opt - { - if ($4.semantic.name) - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); - -- if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $2, $3, $1, &$4.reg_reservation, &@3))) -+ if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $2, $3, $1, &$4.reg_reservation, $5, &@3))) - YYABORT; - } - -@@ -5977,9 +5970,9 @@ func_prototype_no_attrs: - /* Functions are unconditionally inlined. */ - modifiers &= ~HLSL_MODIFIER_INLINE; - -- if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) -+ if (modifiers & ~(HLSL_MODIFIERS_MAJORITY_MASK | HLSL_MODIFIER_EXPORT)) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Only majority modifiers are allowed on functions."); -+ "Unexpected modifier used on a function."); - if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) - YYABORT; - if ((var = hlsl_get_var(ctx->globals, $3))) -@@ -6876,6 +6869,10 @@ var_modifiers: - { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); - } -+ | KW_EXPORT var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_EXPORT, &@1); -+ } - | var_identifier var_modifiers - { - if (!strcmp($1, "precise")) -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 5c09ce04f5b..6f2de93767b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -427,7 +427,10 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * - { - field = &type->e.record.fields[i]; - if (hlsl_type_is_resource(field->type)) -+ { -+ hlsl_fixme(ctx, &field->loc, "Prepend uniform copies for resource components within structs."); - continue; -+ } - validate_field_semantic(ctx, field); - semantic = &field->semantic; - elem_semantic_index = semantic->index; -@@ -5237,25 +5240,6 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - } - } - --static bool type_has_object_components(struct hlsl_type *type) --{ -- if (type->class == HLSL_CLASS_OBJECT) -- return true; -- if (type->class == HLSL_CLASS_ARRAY) -- return type_has_object_components(type->e.array.type); -- if (type->class == HLSL_CLASS_STRUCT) -- { -- unsigned int i; -- -- for (i = 0; i < type->e.record.field_count; ++i) -- { -- if (type_has_object_components(type->e.record.fields[i].type)) -- return true; -- } -- } -- return false; --} -- - static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) - { - struct hlsl_ir_node *instr, *next; -@@ -5363,9 +5347,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - } - else - { -- if (type_has_object_components(var->data_type)) -- hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); -- - if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT - && !var->semantic.name) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 4f0226187af..7230d0e8b61 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -3627,32 +3627,6 @@ struct vsir_cfg_node_sorter - struct vsir_block_list available_blocks; - }; - --static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) --{ -- struct vsir_block_list *loop = NULL; -- struct vsir_cfg_node_sorter_stack_item *item; -- enum vkd3d_result ret; -- -- if (sorter->cfg->loops_by_header[block->label - 1] != SIZE_MAX) -- loop = &sorter->cfg->loops[sorter->cfg->loops_by_header[block->label - 1]]; -- -- if ((ret = vsir_block_list_add_checked(&sorter->available_blocks, block)) < 0) -- return ret; -- -- if (!loop) -- return VKD3D_OK; -- -- if (!vkd3d_array_reserve((void **)&sorter->stack, &sorter->stack_capacity, sorter->stack_count + 1, sizeof(*sorter->stack))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- -- item = &sorter->stack[sorter->stack_count++]; -- item->loop = loop; -- item->seen_count = 0; -- item->begin = sorter->cfg->order.count; -- -- return VKD3D_OK; --} -- - /* Topologically sort the blocks according to the forward edges. By - * definition if the input CFG is reducible then its forward edges - * form a DAG, so a topological sorting exists. In order to compute it -@@ -3727,7 +3701,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - - vsir_block_list_init(&sorter.available_blocks); - -- if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, cfg->entry)) < 0) -+ if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, cfg->entry)) < 0) - goto fail; - - while (sorter.available_blocks.count != 0) -@@ -3754,6 +3728,24 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - break; - } - -+ /* If the node is a loop header, open the loop. */ -+ if (sorter.cfg->loops_by_header[block->label - 1] != SIZE_MAX) -+ { -+ struct vsir_block_list *loop = &sorter.cfg->loops[sorter.cfg->loops_by_header[block->label - 1]]; -+ -+ if (loop) -+ { -+ if (!vkd3d_array_reserve((void **)&sorter.stack, &sorter.stack_capacity, -+ sorter.stack_count + 1, sizeof(*sorter.stack))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ inner_stack_item = &sorter.stack[sorter.stack_count++]; -+ inner_stack_item->loop = loop; -+ inner_stack_item->seen_count = 0; -+ inner_stack_item->begin = sorter.cfg->order.count; -+ } -+ } -+ - vsir_block_list_remove_index(&sorter.available_blocks, i); - block->order_pos = cfg->order.count; - if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) -@@ -3795,7 +3787,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - - if (in_degrees[successor->label - 1] == 0) - { -- if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, successor)) < 0) -+ if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, successor)) < 0) - goto fail; - } - } -@@ -4393,106 +4385,6 @@ fail: - return ret; - } - --enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) --{ -- enum vkd3d_result result = VKD3D_OK; -- -- remove_dcl_temps(program); -- -- if ((result = vsir_program_lower_texkills(program)) < 0) -- return result; -- -- if (program->shader_version.major >= 6) -- { -- struct vsir_cfg cfg; -- -- if ((result = lower_switch_to_if_ladder(program)) < 0) -- return result; -- -- if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) -- return result; -- -- if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) -- return result; -- -- vsir_cfg_compute_dominators(&cfg); -- -- if ((result = vsir_cfg_compute_loops(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -- return result; -- } -- -- if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -- return result; -- } -- -- if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -- return result; -- } -- -- if ((result = vsir_cfg_build_structured_program(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -- return result; -- } -- -- if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -- return result; -- } -- -- vsir_cfg_cleanup(&cfg); -- } -- else -- { -- if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -- { -- if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) -- return result; -- } -- -- if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) -- { -- if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) -- return result; -- -- if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, -- &program->input_signature)) < 0) -- return result; -- } -- -- if ((result = vsir_program_normalise_io_registers(program)) < 0) -- return result; -- -- if ((result = instruction_array_normalise_flat_constants(program)) < 0) -- return result; -- -- remove_dead_code(program); -- -- if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) -- return result; -- } -- -- if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) -- return result; -- -- if (TRACE_ON()) -- vkd3d_shader_trace(program); -- -- if ((result = vsir_program_validate(program, config_flags, -- compile_info->source_name, message_context)) < 0) -- return result; -- -- return result; --} -- - struct validation_context - { - struct vkd3d_shader_message_context *message_context; -@@ -5357,3 +5249,103 @@ fail: - - return VKD3D_ERROR_OUT_OF_MEMORY; - } -+ -+enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -+{ -+ enum vkd3d_result result = VKD3D_OK; -+ -+ remove_dcl_temps(program); -+ -+ if ((result = vsir_program_lower_texkills(program)) < 0) -+ return result; -+ -+ if (program->shader_version.major >= 6) -+ { -+ struct vsir_cfg cfg; -+ -+ if ((result = lower_switch_to_if_ladder(program)) < 0) -+ return result; -+ -+ if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) -+ return result; -+ -+ if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) -+ return result; -+ -+ vsir_cfg_compute_dominators(&cfg); -+ -+ if ((result = vsir_cfg_compute_loops(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ -+ if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ -+ if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ -+ if ((result = vsir_cfg_build_structured_program(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ -+ if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ -+ vsir_cfg_cleanup(&cfg); -+ } -+ else -+ { -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ { -+ if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) -+ return result; -+ } -+ -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) -+ { -+ if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) -+ return result; -+ -+ if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, -+ &program->input_signature)) < 0) -+ return result; -+ } -+ -+ if ((result = vsir_program_normalise_io_registers(program)) < 0) -+ return result; -+ -+ if ((result = instruction_array_normalise_flat_constants(program)) < 0) -+ return result; -+ -+ remove_dead_code(program); -+ -+ if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) -+ return result; -+ } -+ -+ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) -+ return result; -+ -+ if (TRACE_ON()) -+ vkd3d_shader_trace(program); -+ -+ if ((result = vsir_program_validate(program, config_flags, -+ compile_info->source_name, message_context)) < 0) -+ return result; -+ -+ return result; -+} --- -2.43.0 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-9c0d04c86204fd360a7528faf2b53acc730.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-9c0d04c86204fd360a7528faf2b53acc730.patch deleted file mode 100644 index 105211a2..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-9c0d04c86204fd360a7528faf2b53acc730.patch +++ /dev/null @@ -1,1616 +0,0 @@ -From 42ab22402227cf671a3542fee0a8fc2cd77ba3cc Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 5 Apr 2024 08:20:45 +1100 -Subject: [PATCH] Updated vkd3d to 9c0d04c86204fd360a7528faf2b53acc7301b598. - ---- - libs/vkd3d/libs/vkd3d-shader/ir.c | 216 +++++++++- - libs/vkd3d/libs/vkd3d/device.c | 543 ++++++++++++++++++++------ - libs/vkd3d/libs/vkd3d/resource.c | 2 +- - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 12 +- - 5 files changed, 635 insertions(+), 142 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 7230d0e8b61..8af537390f9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -3050,6 +3050,8 @@ struct vsir_cfg_structure - STRUCTURE_TYPE_BLOCK, - /* Execute a loop, which is identified by an index. */ - STRUCTURE_TYPE_LOOP, -+ /* Execute a selection construct. */ -+ STRUCTURE_TYPE_SELECTION, - /* Execute a `return' or a (possibly) multilevel `break' or - * `continue', targeting a loop by its index. If `condition' - * is non-NULL, then the jump is conditional (this is -@@ -3065,6 +3067,13 @@ struct vsir_cfg_structure - unsigned idx; - } loop; - struct -+ { -+ struct vkd3d_shader_src_param *condition; -+ struct vsir_cfg_structure_list if_body; -+ struct vsir_cfg_structure_list else_body; -+ bool invert_condition; -+ } selection; -+ struct - { - enum vsir_cfg_jump_type - { -@@ -3110,6 +3119,20 @@ static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg - return ret; - } - -+static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_cfg_structure_list *list, -+ struct vsir_cfg_structure *begin, size_t size) -+{ -+ if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + size, -+ sizeof(*list->structures))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); -+ -+ list->count += size; -+ -+ return VKD3D_OK; -+} -+ - static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type) - { - memset(structure, 0, sizeof(*structure)); -@@ -3118,8 +3141,20 @@ static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum v - - static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) - { -- if (structure->type == STRUCTURE_TYPE_LOOP) -- vsir_cfg_structure_list_cleanup(&structure->u.loop.body); -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_LOOP: -+ vsir_cfg_structure_list_cleanup(&structure->u.loop.body); -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ vsir_cfg_structure_list_cleanup(&structure->u.selection.if_body); -+ vsir_cfg_structure_list_cleanup(&structure->u.selection.else_body); -+ break; -+ -+ default: -+ break; -+ } - } - - struct vsir_cfg -@@ -3295,6 +3330,25 @@ static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_struct - TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx); - break; - -+ case STRUCTURE_TYPE_SELECTION: -+ TRACE("%sif {\n", cfg->debug_buffer.buffer); -+ -+ vsir_cfg_structure_list_dump(cfg, &structure->u.selection.if_body); -+ -+ if (structure->u.selection.else_body.count == 0) -+ { -+ TRACE("%s}\n", cfg->debug_buffer.buffer); -+ } -+ else -+ { -+ TRACE("%s} else {\n", cfg->debug_buffer.buffer); -+ -+ vsir_cfg_structure_list_dump(cfg, &structure->u.selection.else_body); -+ -+ TRACE("%s}\n", cfg->debug_buffer.buffer); -+ } -+ break; -+ - case STRUCTURE_TYPE_JUMP: - { - const char *type_str; -@@ -4115,11 +4169,29 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - * next block, in which case we make sure it's the - * false branch. */ - if (action_true.jump_type == JUMP_NONE) -+ { -+ invert_condition = true; -+ } -+ else if (stack_depth >= 2) -+ { -+ struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; -+ struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; -+ -+ assert(inner_loop->type == STRUCTURE_TYPE_LOOP); -+ -+ /* Otherwise, if one of the branches is -+ * continueing the inner loop we're inside, -+ * make sure it's the false branch (because it -+ * will be optimized out later). */ -+ if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx) -+ invert_condition = true; -+ } -+ -+ if (invert_condition) - { - struct vsir_cfg_edge_action tmp = action_true; - action_true = action_false; - action_false = tmp; -- invert_condition = true; - } - - assert(action_true.jump_type != JUMP_NONE); -@@ -4178,6 +4250,103 @@ fail: - return VKD3D_ERROR_OUT_OF_MEMORY; - } - -+static void vsir_cfg_remove_trailing_continue(struct vsir_cfg_structure_list *list, unsigned int target) -+{ -+ struct vsir_cfg_structure *last = &list->structures[list->count - 1]; -+ -+ if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE -+ && !last->u.jump.condition && last->u.jump.target == target) -+ --list->count; -+} -+ -+static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structure_list *list) -+{ -+ enum vkd3d_result ret; -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; -+ -+ if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) -+ continue; -+ -+ vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); -+ new_selection.u.selection.condition = structure->u.jump.condition; -+ new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; -+ -+ if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, -+ STRUCTURE_TYPE_JUMP))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ new_jump->u.jump.type = structure->u.jump.type; -+ new_jump->u.jump.target = structure->u.jump.target; -+ -+ /* Move the rest of the structure list in the else branch -+ * rather than leaving it after the selection construct. The -+ * reason is that this is more conducive to further -+ * optimization, because all the conditional `break's appear -+ * as the last instruction of a branch of a cascade of -+ * selection constructs at the end of the structure list we're -+ * processing, instead of being buried in the middle of the -+ * structure list itself. */ -+ if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, -+ &list->structures[i + 1], list->count - i - 1)) < 0) -+ return ret; -+ -+ *structure = new_selection; -+ list->count = i + 1; -+ -+ if ((ret = vsir_cfg_synthesize_selections(&structure->u.selection.else_body)) < 0) -+ return ret; -+ -+ break; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) -+{ -+ enum vkd3d_result ret; -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *loop = &list->structures[i]; -+ struct vsir_cfg_structure_list *loop_body; -+ -+ if (loop->type != STRUCTURE_TYPE_LOOP) -+ continue; -+ -+ loop_body = &loop->u.loop.body; -+ -+ if (loop_body->count == 0) -+ continue; -+ -+ vsir_cfg_remove_trailing_continue(loop_body, loop->u.loop.idx); -+ -+ if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) -+ return ret; -+ -+ if ((ret = vsir_cfg_synthesize_selections(loop_body)) < 0) -+ return ret; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) -+{ -+ enum vkd3d_result ret; -+ -+ ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); -+ -+ if (TRACE_ON()) -+ vsir_cfg_dump_structured_program(cfg); -+ -+ return ret; -+} -+ - static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, - struct vsir_cfg_structure_list *list, unsigned int loop_idx) - { -@@ -4274,6 +4443,41 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, - break; - } - -+ case STRUCTURE_TYPE_SELECTION: -+ if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], &no_loc, -+ VKD3DSIH_IF, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ cfg->instructions[cfg->ins_count].src[0] = *structure->u.selection.condition; -+ -+ if (structure->u.selection.invert_condition) -+ cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; -+ -+ ++cfg->ins_count; -+ -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.if_body, loop_idx)) < 0) -+ return ret; -+ -+ if (structure->u.selection.else_body.count != 0) -+ { -+ if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ELSE); -+ -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.else_body, loop_idx)) < 0) -+ return ret; -+ } -+ -+ if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDIF); -+ break; -+ - case STRUCTURE_TYPE_JUMP: - { - /* Encode the jump target as the loop index plus a bit to remember whether -@@ -5299,6 +5503,12 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - return result; - } - -+ if ((result = vsir_cfg_optimize(&cfg)) < 0) -+ { -+ vsir_cfg_cleanup(&cfg); -+ return result; -+ } -+ - if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) - { - vsir_cfg_cleanup(&cfg); -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 7841a811bf7..65db8b70bfd 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -2523,18 +2523,224 @@ static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cach - } - } - -+/* ID3D12ShaderCacheSession */ -+struct d3d12_cache_session -+{ -+ ID3D12ShaderCacheSession ID3D12ShaderCacheSession_iface; -+ unsigned int refcount; -+ -+ struct d3d12_device *device; -+ struct vkd3d_private_store private_store; -+ D3D12_SHADER_CACHE_SESSION_DESC desc; -+}; -+ -+static inline struct d3d12_cache_session *impl_from_ID3D12ShaderCacheSession(ID3D12ShaderCacheSession *iface) -+{ -+ return CONTAINING_RECORD(iface, struct d3d12_cache_session, ID3D12ShaderCacheSession_iface); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_QueryInterface(ID3D12ShaderCacheSession *iface, -+ REFIID iid, void **object) -+{ -+ TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); -+ -+ if (!object) -+ { -+ WARN("Output pointer is NULL, returning E_POINTER.\n"); -+ return E_POINTER; -+ } -+ -+ if (IsEqualGUID(iid, &IID_ID3D12ShaderCacheSession) -+ || IsEqualGUID(iid, &IID_ID3D12DeviceChild) -+ || IsEqualGUID(iid, &IID_ID3D12Object) -+ || IsEqualGUID(iid, &IID_IUnknown)) -+ { -+ ID3D12ShaderCacheSession_AddRef(iface); -+ *object = iface; -+ return S_OK; -+ } -+ -+ WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); -+ -+ *object = NULL; -+ return E_NOINTERFACE; -+} -+ -+static ULONG STDMETHODCALLTYPE d3d12_cache_session_AddRef(ID3D12ShaderCacheSession *iface) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ unsigned int refcount = vkd3d_atomic_increment_u32(&session->refcount); -+ -+ TRACE("%p increasing refcount to %u.\n", session, refcount); -+ -+ return refcount; -+} -+ -+static void d3d12_cache_session_destroy(struct d3d12_cache_session *session) -+{ -+ struct d3d12_device *device = session->device; -+ -+ TRACE("Destroying cache session %p.\n", session); -+ -+ vkd3d_private_store_destroy(&session->private_store); -+ vkd3d_free(session); -+ -+ d3d12_device_release(device); -+} -+ -+static ULONG STDMETHODCALLTYPE d3d12_cache_session_Release(ID3D12ShaderCacheSession *iface) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ unsigned int refcount = vkd3d_atomic_decrement_u32(&session->refcount); -+ -+ TRACE("%p decreasing refcount to %u.\n", session, refcount); -+ -+ if (!refcount) -+ d3d12_cache_session_destroy(session); -+ -+ return refcount; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetPrivateData(ID3D12ShaderCacheSession *iface, -+ REFGUID guid, UINT *data_size, void *data) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); -+ -+ return vkd3d_get_private_data(&session->private_store, guid, data_size, data); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetPrivateData(ID3D12ShaderCacheSession *iface, -+ REFGUID guid, UINT data_size, const void *data) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); -+ -+ return vkd3d_set_private_data(&session->private_store, guid, data_size, data); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetPrivateDataInterface( -+ ID3D12ShaderCacheSession *iface, REFGUID guid, const IUnknown *data) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); -+ -+ return vkd3d_set_private_data_interface(&session->private_store, guid, data); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetName(ID3D12ShaderCacheSession *iface, -+ const WCHAR *name) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, name %s.\n", iface, debugstr_w(name, session->device->wchar_size)); -+ -+ return name ? S_OK : E_INVALIDARG; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetDevice(ID3D12ShaderCacheSession *iface, -+ REFIID iid, void **device) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); -+ -+ return d3d12_device_query_interface(session->device, iid, device); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_FindValue(ID3D12ShaderCacheSession *iface, -+ const void *key, UINT key_size, void *value, UINT *value_size) -+{ -+ FIXME("iface %p, key %p, key_size %#x, value %p, value_size %p stub!\n", -+ iface, key, key_size, value, value_size); -+ -+ return DXGI_ERROR_NOT_FOUND; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_StoreValue(ID3D12ShaderCacheSession *iface, -+ const void *key, UINT key_size, const void *value, UINT value_size) -+{ -+ FIXME("iface %p, key %p, key_size %#x, value %p, value_size %u stub!\n", iface, key, key_size, -+ value, value_size); -+ -+ return E_NOTIMPL; -+} -+ -+static void STDMETHODCALLTYPE d3d12_cache_session_SetDeleteOnDestroy(ID3D12ShaderCacheSession *iface) -+{ -+ FIXME("iface %p stub!\n", iface); -+} -+ -+static D3D12_SHADER_CACHE_SESSION_DESC * STDMETHODCALLTYPE d3d12_cache_session_GetDesc( -+ ID3D12ShaderCacheSession *iface, D3D12_SHADER_CACHE_SESSION_DESC *desc) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p.\n", iface); -+ *desc = session->desc; -+ return desc; -+} -+ -+static const struct ID3D12ShaderCacheSessionVtbl d3d12_cache_session_vtbl = -+{ -+ /* IUnknown methods */ -+ d3d12_cache_session_QueryInterface, -+ d3d12_cache_session_AddRef, -+ d3d12_cache_session_Release, -+ /* ID3D12Object methods */ -+ d3d12_cache_session_GetPrivateData, -+ d3d12_cache_session_SetPrivateData, -+ d3d12_cache_session_SetPrivateDataInterface, -+ d3d12_cache_session_SetName, -+ /* ID3D12DeviceChild methods */ -+ d3d12_cache_session_GetDevice, -+ /* ID3D12ShaderCacheSession methods */ -+ d3d12_cache_session_FindValue, -+ d3d12_cache_session_StoreValue, -+ d3d12_cache_session_SetDeleteOnDestroy, -+ d3d12_cache_session_GetDesc, -+}; -+ -+static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, -+ struct d3d12_device *device, const D3D12_SHADER_CACHE_SESSION_DESC *desc) -+{ -+ HRESULT hr; -+ -+ session->ID3D12ShaderCacheSession_iface.lpVtbl = &d3d12_cache_session_vtbl; -+ session->refcount = 1; -+ session->desc = *desc; -+ -+ if (!session->desc.MaximumValueFileSizeBytes) -+ session->desc.MaximumValueFileSizeBytes = 128 * 1024 * 1024; -+ if (!session->desc.MaximumInMemoryCacheSizeBytes) -+ session->desc.MaximumInMemoryCacheSizeBytes = 1024 * 1024; -+ if (!session->desc.MaximumInMemoryCacheEntries) -+ session->desc.MaximumInMemoryCacheEntries = 128; -+ -+ if (FAILED(hr = vkd3d_private_store_init(&session->private_store))) -+ return hr; -+ -+ d3d12_device_add_ref(session->device = device); -+ -+ return S_OK; -+} -+ - /* ID3D12Device */ --static inline struct d3d12_device *impl_from_ID3D12Device8(ID3D12Device8 *iface) -+static inline struct d3d12_device *impl_from_ID3D12Device9(ID3D12Device9 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device8_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device9_iface); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device9 *iface, - REFIID riid, void **object) - { - TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - -- if (IsEqualGUID(riid, &IID_ID3D12Device8) -+ if (IsEqualGUID(riid, &IID_ID3D12Device9) -+ || IsEqualGUID(riid, &IID_ID3D12Device8) - || IsEqualGUID(riid, &IID_ID3D12Device7) - || IsEqualGUID(riid, &IID_ID3D12Device6) - || IsEqualGUID(riid, &IID_ID3D12Device5) -@@ -2557,9 +2763,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device8 *ifac - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device8 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device9 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - unsigned int refcount = vkd3d_atomic_increment_u32(&device->refcount); - - TRACE("%p increasing refcount to %u.\n", device, refcount); -@@ -2589,9 +2795,9 @@ static HRESULT device_worker_stop(struct d3d12_device *device) - return S_OK; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device8 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device9 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - unsigned int refcount = vkd3d_atomic_decrement_u32(&device->refcount); - - TRACE("%p decreasing refcount to %u.\n", device, refcount); -@@ -2628,10 +2834,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device8 *iface) - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device9 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2639,10 +2845,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device8 *ifac - return vkd3d_get_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device9 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2650,19 +2856,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device8 *ifac - return vkd3d_set_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device9 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&device->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device8 *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device9 *iface, const WCHAR *name) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); - -@@ -2670,17 +2876,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device8 *iface, cons - VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device8 *iface) -+static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device9 *iface) - { - TRACE("iface %p.\n", iface); - - return 1; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device9 *iface, - const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_command_queue *object; - HRESULT hr; - -@@ -2694,10 +2900,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device8 * - riid, command_queue); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device9 *iface, - D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_command_allocator *object; - HRESULT hr; - -@@ -2711,10 +2917,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic - riid, command_allocator); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device9 *iface, - const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2728,10 +2934,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device9 *iface, - const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2745,11 +2951,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device9 *iface, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, - ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_command_list *object; - HRESULT hr; - -@@ -2872,10 +3078,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) - return true; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 *iface, - D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", - iface, feature, feature_data, feature_data_size); -@@ -3547,10 +3753,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device8 - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device9 *iface, - const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_descriptor_heap *object; - HRESULT hr; - -@@ -3564,7 +3770,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device8 - &IID_ID3D12DescriptorHeap, riid, descriptor_heap); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device8 *iface, -+static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device9 *iface, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { - TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); -@@ -3587,11 +3793,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device9 *iface, - UINT node_mask, const void *bytecode, SIZE_T bytecode_length, - REFIID riid, void **root_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_root_signature *object; - HRESULT hr; - -@@ -3607,10 +3813,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device8 - &IID_ID3D12RootSignature, riid, root_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device9 *iface, - const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); -@@ -3619,11 +3825,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device9 *iface, - ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, desc %p, descriptor %s.\n", -@@ -3633,11 +3839,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device9 *iface, - ID3D12Resource *resource, ID3D12Resource *counter_resource, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %s.\n", -@@ -3648,7 +3854,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device9 *iface, - ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3656,10 +3862,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device8 - iface, resource, desc, debug_cpu_handle(descriptor)); - - d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device8(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device9(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device9 *iface, - ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3667,13 +3873,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device8 - iface, resource, desc, debug_cpu_handle(descriptor)); - - d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device8(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device9(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device9 *iface, - const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); -@@ -3682,14 +3888,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device8 *iface, - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device9 *iface, - UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, - const UINT *dst_descriptor_range_sizes, - UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, - const UINT *src_descriptor_range_sizes, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - unsigned int dst_range_size, src_range_size; - struct d3d12_descriptor_heap *dst_heap; -@@ -3745,7 +3951,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device8 *iface, - } - } - --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device9 *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -@@ -3876,10 +4082,10 @@ static void d3d12_device_get_resource_allocation_info(struct d3d12_device *devic - } - - static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( -- ID3D12Device8 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, -+ ID3D12Device9 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, - UINT count, const D3D12_RESOURCE_DESC *resource_descs) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p.\n", - iface, info, visible_mask, count, resource_descs); -@@ -3891,10 +4097,10 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour - return info; - } - --static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device8 *iface, -+static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device9 *iface, - D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - bool coherent; - - TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", -@@ -3934,12 +4140,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope - return heap_properties; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device9 *iface, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_resource *object; - HRESULT hr; -@@ -3961,10 +4167,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device9 *iface, - const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_heap *object; - HRESULT hr; - -@@ -3980,12 +4186,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device8 *iface, - return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device9 *iface, - ID3D12Heap *heap, UINT64 heap_offset, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_heap *heap_object; - struct d3d12_resource *object; -@@ -4006,11 +4212,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device8 - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device9 *iface, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_resource *object; - HRESULT hr; -@@ -4027,11 +4233,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device9 *iface, - ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, - const WCHAR *name, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", - iface, object, attributes, (uint32_t)access, debugstr_w(name, device->wchar_size), handle); -@@ -4039,7 +4245,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device8 * - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device9 *iface, - HANDLE handle, REFIID riid, void **object) - { - FIXME("iface %p, handle %p, riid %s, object %p stub!\n", -@@ -4048,10 +4254,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device8 *if - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device9 *iface, - const WCHAR *name, DWORD access, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - FIXME("iface %p, name %s, access %#x, handle %p stub!\n", - iface, debugstr_w(name, device->wchar_size), (uint32_t)access, handle); -@@ -4059,7 +4265,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device9 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - ID3D12Fence *fence; -@@ -4067,17 +4273,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device8 *iface, - - TRACE("iface %p, object_count %u, objects %p.\n", iface, object_count, objects); - -- if (FAILED(hr = ID3D12Device8_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) -+ if (FAILED(hr = ID3D12Device9_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) - return hr; - -- hr = ID3D12Device8_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); -+ hr = ID3D12Device9_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); - if (SUCCEEDED(hr)) - ID3D12Fence_SetEventOnCompletion(fence, 1, NULL); - ID3D12Fence_Release(fence); - return hr; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device9 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", -@@ -4086,10 +4292,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device8 *iface, - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device9 *iface, - UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_fence *object; - HRESULT hr; - -@@ -4102,9 +4308,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device8 *iface, - return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device8 *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device9 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p.\n", iface); - -@@ -4189,12 +4395,12 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, - *total_bytes = total; - } - --static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device9 *iface, - const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, - UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, - UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - - TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " -@@ -4208,10 +4414,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device8 * - base_offset, layouts, row_counts, row_sizes, total_bytes); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device9 *iface, - const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_query_heap *object; - HRESULT hr; - -@@ -4224,18 +4430,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device8 *ifa - return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device8 *iface, BOOL enable) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device9 *iface, BOOL enable) - { - FIXME("iface %p, enable %#x stub!\n", iface, enable); - - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device9 *iface, - const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, - REFIID iid, void **command_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_command_signature *object; - HRESULT hr; - -@@ -4249,14 +4455,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic - &IID_ID3D12CommandSignature, iid, command_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device9 *iface, - ID3D12Resource *resource, UINT *total_tile_count, - D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, - UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, - D3D12_SUBRESOURCE_TILING *sub_resource_tilings) - { - const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " - "standard_title_shape %p, sub_resource_tiling_count %p, " -@@ -4269,9 +4475,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device8 *ifac - sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); - } - --static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device8 *iface, LUID *luid) -+static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device9 *iface, LUID *luid) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, luid %p.\n", iface, luid); - -@@ -4280,7 +4486,7 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device8 *iface - return luid; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device9 *iface, - const void *blob, SIZE_T blob_size, REFIID iid, void **lib) - { - FIXME("iface %p, blob %p, blob_size %"PRIuPTR", iid %s, lib %p stub!\n", -@@ -4289,7 +4495,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device - return DXGI_ERROR_UNSUPPORTED; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device9 *iface, - ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, - D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) - { -@@ -4299,7 +4505,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion( - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device9 *iface, - UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) - { - FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); -@@ -4307,10 +4513,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device8 - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device9 *iface, - const D3D12_PIPELINE_STATE_STREAM_DESC *desc, REFIID iid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -4322,7 +4528,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device8 - return return_interface(&object->ID3D12PipelineState_iface, &IID_ID3D12PipelineState, iid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device9 *iface, - const void *address, REFIID iid, void **heap) - { - FIXME("iface %p, address %p, iid %s, heap %p stub!\n", iface, address, debugstr_guid(iid), heap); -@@ -4330,7 +4536,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device9 *iface, - HANDLE file_mapping, REFIID iid, void **heap) - { - FIXME("iface %p, file_mapping %p, iid %s, heap %p stub!\n", iface, file_mapping, debugstr_guid(iid), heap); -@@ -4338,7 +4544,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device9 *iface, - D3D12_RESIDENCY_FLAGS flags, UINT num_objects, ID3D12Pageable *const *objects, - ID3D12Fence *fence, UINT64 fence_value) - { -@@ -4349,7 +4555,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device8 - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device9 *iface, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, D3D12_COMMAND_LIST_FLAGS flags, - REFIID iid, void **command_list) - { -@@ -4359,7 +4565,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device8 * - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device9 *iface, - const D3D12_PROTECTED_RESOURCE_SESSION_DESC *desc, REFIID iid, void **session) - { - FIXME("iface %p, desc %p, iid %s, session %p stub!\n", iface, desc, debugstr_guid(iid), session); -@@ -4367,13 +4573,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device9 *iface, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, - ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_resource *object; - HRESULT hr; -@@ -4395,11 +4601,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Dev - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device9 *iface, - const D3D12_HEAP_DESC *desc, ID3D12ProtectedResourceSession *protected_session, - REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_heap *object; - HRESULT hr; - -@@ -4415,7 +4621,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device8 *iface, - return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device9 *iface, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, - ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) -@@ -4429,11 +4635,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Devi - } - - static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo1( -- ID3D12Device8 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, -+ ID3D12Device9 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, - UINT count, const D3D12_RESOURCE_DESC *resource_descs, - D3D12_RESOURCE_ALLOCATION_INFO1 *info1) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", - iface, info, visible_mask, count, resource_descs, info1); -@@ -4445,7 +4651,7 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour - return info; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device9 *iface, - ID3D12LifetimeOwner *owner, REFIID iid, void **tracker) - { - FIXME("iface %p, owner %p, iid %s, tracker %p stub!\n", iface, owner, debugstr_guid(iid), tracker); -@@ -4453,12 +4659,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device - return E_NOTIMPL; - } - --static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device8 *iface) -+static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device9 *iface) - { - FIXME("iface %p stub!\n", iface); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device9 *iface, - UINT *num_meta_commands, D3D12_META_COMMAND_DESC *command_desc) - { - FIXME("iface %p, num_meta_commands %p, command_desc %p stub!\n", iface, -@@ -4467,7 +4673,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device9 *iface, - REFGUID command_id, D3D12_META_COMMAND_PARAMETER_STAGE stage, - UINT *size_in_bytes, UINT *parameter_count, - D3D12_META_COMMAND_PARAMETER_DESC *parameter_desc) -@@ -4479,7 +4685,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device9 *iface, - REFGUID command_id, UINT node_mask, const void *parameters_data, - SIZE_T data_size_in_bytes, REFIID iid, void **meta_command) - { -@@ -4491,7 +4697,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device8 *i - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device9 *iface, - const D3D12_STATE_OBJECT_DESC *desc, REFIID iid, void **state_object) - { - FIXME("iface %p, desc %p, iid %s, state_object %p stub!\n", iface, desc, debugstr_guid(iid), state_object); -@@ -4499,14 +4705,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device8 *i - return E_NOTIMPL; - } - --static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device9 *iface, - const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc, - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO *info) - { - FIXME("iface %p, desc %p, info %p stub!\n", iface, desc, info); - } - --static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device8 *iface, -+static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device9 *iface, - D3D12_SERIALIZED_DATA_TYPE data_type, const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER *identifier) - { - FIXME("iface %p, data_type %u, identifier %p stub!\n", iface, data_type, identifier); -@@ -4514,7 +4720,7 @@ static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_Ch - return D3D12_DRIVER_MATCHING_IDENTIFIER_UNRECOGNIZED; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device9 *iface, - D3D12_BACKGROUND_PROCESSING_MODE mode, D3D12_MEASUREMENTS_ACTION action, HANDLE event, - BOOL *further_measurements_desired) - { -@@ -4524,7 +4730,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device8 *iface, -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device9 *iface, - const D3D12_STATE_OBJECT_DESC *addition, ID3D12StateObject *state_object_to_grow_from, - REFIID riid, void **new_state_object) - { -@@ -4534,7 +4741,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device8 *if - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device9 *iface, - const D3D12_PROTECTED_RESOURCE_SESSION_DESC1 *desc, REFIID riid, void **session) - { - FIXME("iface %p, desc %p, riid %s, session %p stub!\n", iface, desc, debugstr_guid(riid), session); -@@ -4542,11 +4749,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID - return E_NOTIMPL; - } - --static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo2(ID3D12Device8 *iface, -+static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo2(ID3D12Device9 *iface, - D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, - const D3D12_RESOURCE_DESC1 *resource_descs, D3D12_RESOURCE_ALLOCATION_INFO1 *info1) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", - iface, info, visible_mask, count, resource_descs, info1); -@@ -4558,12 +4765,12 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour - return info; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2(ID3D12Device9 *iface, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *desc, - D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, - ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_resource *object; - HRESULT hr; - -@@ -4582,12 +4789,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2(ID3D12Dev - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1(ID3D12Device8 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1(ID3D12Device9 *iface, - ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC1 *resource_desc, - D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, - REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_heap *heap_object; - struct d3d12_resource *object; - HRESULT hr; -@@ -4606,19 +4813,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1(ID3D12Device - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView(ID3D12Device9 *iface, - ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { - FIXME("iface %p, target_resource %p, feedback_resource %p, descriptor %s stub!\n", - iface, target_resource, feedback_resource, debug_cpu_handle(descriptor)); - } - --static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints1(ID3D12Device8 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints1(ID3D12Device9 *iface, - const D3D12_RESOURCE_DESC1 *desc, UINT first_sub_resource, UINT sub_resource_count, - UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, - UINT64 *row_sizes, UINT64 *total_bytes) - { -- struct d3d12_device *device = impl_from_ID3D12Device8(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " - "layouts %p, row_counts %p, row_sizes %p, total_bytes %p.\n", -@@ -4629,7 +4836,79 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints1(ID3D12Device8 - base_offset, layouts, row_counts, row_sizes, total_bytes); - } - --static const struct ID3D12Device8Vtbl d3d12_device_vtbl = -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateShaderCacheSession(ID3D12Device9 *iface, -+ const D3D12_SHADER_CACHE_SESSION_DESC *desc, REFIID iid, void **session) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); -+ struct d3d12_cache_session *object; -+ static const GUID guid_null = {0}; -+ HRESULT hr; -+ -+ static const UINT valid_flags = D3D12_SHADER_CACHE_FLAG_DRIVER_VERSIONED -+ | D3D12_SHADER_CACHE_FLAG_USE_WORKING_DIR; -+ -+ TRACE("iface %p, desc %p, iid %s, session %p.\n", iface, desc, debugstr_guid(iid), session); -+ -+ if (!desc || !memcmp(&desc->Identifier, &guid_null, sizeof(desc->Identifier))) -+ { -+ WARN("No description or identifier, returning E_INVALIDARG.\n"); -+ return E_INVALIDARG; -+ } -+ if (desc->MaximumValueFileSizeBytes > 1024 * 1024 * 1024) -+ { -+ WARN("Requested size is larger than 1GiB, returning E_INVALIDARG.\n"); -+ return E_INVALIDARG; -+ } -+ if (desc->Flags & ~valid_flags) -+ { -+ WARN("Invalid flags %#x, returning E_INVALIDARG.\n", desc->Flags); -+ return E_INVALIDARG; -+ } -+ if (desc->Mode != D3D12_SHADER_CACHE_MODE_MEMORY && desc->Mode != D3D12_SHADER_CACHE_MODE_DISK) -+ { -+ WARN("Invalid mode %#x, returning E_INVALIDARG.\n", desc->Mode); -+ return E_INVALIDARG; -+ } -+ if (!session) -+ { -+ WARN("No output pointer, returning S_FALSE.\n"); -+ return S_FALSE; -+ } -+ -+ if (!(object = vkd3d_malloc(sizeof(*object)))) -+ return E_OUTOFMEMORY; -+ -+ if (FAILED(hr = d3d12_cache_session_init(object, device, desc))) -+ { -+ vkd3d_free(object); -+ return hr; -+ } -+ -+ hr = ID3D12ShaderCacheSession_QueryInterface(&object->ID3D12ShaderCacheSession_iface, iid, -+ session); -+ ID3D12ShaderCacheSession_Release(&object->ID3D12ShaderCacheSession_iface); -+ return hr; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_ShaderCacheControl(ID3D12Device9 *iface, -+ D3D12_SHADER_CACHE_KIND_FLAGS kinds, D3D12_SHADER_CACHE_CONTROL_FLAGS control) -+{ -+ FIXME("iface %p, kinds %#x control %#x stub!\n", iface, kinds, control); -+ -+ return E_NOTIMPL; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue1(ID3D12Device9 *iface, -+ const D3D12_COMMAND_QUEUE_DESC *desc, REFIID creator_id, REFIID iid, -+ void **command_queue) -+{ -+ FIXME("iface %p, desc %p, creator %s, iid %s, queue %p stub!\n", iface, desc, -+ debugstr_guid(creator_id), debugstr_guid(iid), command_queue); -+ -+ return E_NOTIMPL; -+} -+ -+static const struct ID3D12Device9Vtbl d3d12_device_vtbl = - { - /* IUnknown methods */ - d3d12_device_QueryInterface, -@@ -4715,14 +4994,18 @@ static const struct ID3D12Device8Vtbl d3d12_device_vtbl = - d3d12_device_CreatePlacedResource1, - d3d12_device_CreateSamplerFeedbackUnorderedAccessView, - d3d12_device_GetCopyableFootprints1, -+ /* ID3D12Device9 methods */ -+ d3d12_device_CreateShaderCacheSession, -+ d3d12_device_ShaderCacheControl, -+ d3d12_device_CreateCommandQueue1, - }; - --struct d3d12_device *unsafe_impl_from_ID3D12Device8(ID3D12Device8 *iface) -+struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface) - { - if (!iface) - return NULL; - assert(iface->lpVtbl == &d3d12_device_vtbl); -- return impl_from_ID3D12Device8(iface); -+ return impl_from_ID3D12Device9(iface); - } - - static void *device_worker_main(void *arg) -@@ -4765,7 +5048,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, - const struct vkd3d_vk_device_procs *vk_procs; - HRESULT hr; - -- device->ID3D12Device8_iface.lpVtbl = &d3d12_device_vtbl; -+ device->ID3D12Device9_iface.lpVtbl = &d3d12_device_vtbl; - device->refcount = 1; - - vkd3d_instance_incref(device->vkd3d_instance = instance); -@@ -5013,28 +5296,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha - - IUnknown *vkd3d_get_device_parent(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); - - return d3d12_device->parent; - } - - VkDevice vkd3d_get_vk_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); - - return d3d12_device->vk_device; - } - - VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); - - return d3d12_device->vk_physical_device; - } - - struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); - - return d3d12_device->vkd3d_instance; - } -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 446ef3ab0db..b83a45d0606 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -2260,7 +2260,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - HRESULT vkd3d_create_image_resource(ID3D12Device *device, - const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) - { -- struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device8((ID3D12Device8 *)device); -+ struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device9((ID3D12Device9 *)device); - struct d3d12_resource *object; - HRESULT hr; - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index f6925d47bdf..e0345deaa0f 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, - - if (!device) - { -- ID3D12Device_Release(&object->ID3D12Device8_iface); -+ ID3D12Device_Release(&object->ID3D12Device9_iface); - return S_FALSE; - } - -- return return_interface(&object->ID3D12Device8_iface, &IID_ID3D12Device, iid, device); -+ return return_interface(&object->ID3D12Device9_iface, &IID_ID3D12Device, iid, device); - } - - /* ID3D12RootSignatureDeserializer */ -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 39d892a6fa7..14c8eb54574 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -1669,7 +1669,7 @@ struct vkd3d_desc_object_cache - /* ID3D12Device */ - struct d3d12_device - { -- ID3D12Device8 ID3D12Device8_iface; -+ ID3D12Device9 ID3D12Device9_iface; - unsigned int refcount; - - VkDevice vk_device; -@@ -1744,29 +1744,29 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 - bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); - void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, - const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); --struct d3d12_device *unsafe_impl_from_ID3D12Device8(ID3D12Device8 *iface); -+struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface); - HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); - void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); - - static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) - { -- return ID3D12Device8_QueryInterface(&device->ID3D12Device8_iface, iid, object); -+ return ID3D12Device9_QueryInterface(&device->ID3D12Device9_iface, iid, object); - } - - static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) - { -- return ID3D12Device8_AddRef(&device->ID3D12Device8_iface); -+ return ID3D12Device9_AddRef(&device->ID3D12Device9_iface); - } - - static inline ULONG d3d12_device_release(struct d3d12_device *device) - { -- return ID3D12Device8_Release(&device->ID3D12Device8_iface); -+ return ID3D12Device9_Release(&device->ID3D12Device9_iface); - } - - static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) - { -- return ID3D12Device8_GetDescriptorHandleIncrementSize(&device->ID3D12Device8_iface, descriptor_type); -+ return ID3D12Device9_GetDescriptorHandleIncrementSize(&device->ID3D12Device9_iface, descriptor_type); - } - - /* utils */ --- -2.43.0 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-7d6f0f2592a8aedf749c2dff36ea330e9cc.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-7d6f0f2592a8aedf749c2dff36ea330e9cc.patch deleted file mode 100644 index dfc89e3d..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-7d6f0f2592a8aedf749c2dff36ea330e9cc.patch +++ /dev/null @@ -1,5531 +0,0 @@ -From 6d3321209413caa5f5a3d96bf3802552ee56602e Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Tue, 16 Apr 2024 12:05:29 +1000 -Subject: [PATCH] Updated vkd3d to 7d6f0f2592a8aedf749c2dff36ea330e9ccb49d1. - ---- - libs/vkd3d/Makefile.in | 1 + - libs/vkd3d/include/private/vkd3d_common.h | 6 + - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 528 +++--- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 135 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 158 ++ - libs/vkd3d/libs/vkd3d-shader/fx.c | 150 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 104 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 77 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 495 ++++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 134 +- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 23 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 1451 ++++++++++------- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 35 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 35 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 25 + - libs/vkd3d/libs/vkd3d/cache.c | 59 + - libs/vkd3d/libs/vkd3d/device.c | 62 + - libs/vkd3d/libs/vkd3d/resource.c | 12 + - libs/vkd3d/libs/vkd3d/state.c | 28 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 7 + - 20 files changed, 2329 insertions(+), 1196 deletions(-) - create mode 100644 libs/vkd3d/libs/vkd3d/cache.c - -diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in -index 448e9a0e61d..94e4833dc9a 100644 ---- a/libs/vkd3d/Makefile.in -+++ b/libs/vkd3d/Makefile.in -@@ -30,6 +30,7 @@ SOURCES = \ - libs/vkd3d-shader/spirv.c \ - libs/vkd3d-shader/tpf.c \ - libs/vkd3d-shader/vkd3d_shader_main.c \ -+ libs/vkd3d/cache.c \ - libs/vkd3d/command.c \ - libs/vkd3d/device.c \ - libs/vkd3d/resource.c \ -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 1cc8ecc38f3..f9df47d339c 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -438,6 +438,12 @@ struct vkd3d_mutex - #endif - }; - -+#ifdef _WIN32 -+#define VKD3D_MUTEX_INITIALIZER {{NULL, -1, 0, 0, 0, 0}} -+#else -+#define VKD3D_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER -+#endif -+ - static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) - { - #ifdef _WIN32 -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 2b5feb94103..459fdfc9abf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -330,37 +330,6 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_XOR ] = "xor", - }; - --static const struct --{ -- enum vkd3d_shader_input_sysval_semantic sysval_semantic; -- const char *sysval_name; --} --shader_input_sysval_semantic_names[] = --{ -- {VKD3D_SIV_POSITION, "position"}, -- {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, -- {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, -- {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, -- {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, -- {VKD3D_SIV_VERTEX_ID, "vertex_id"}, -- {VKD3D_SIV_INSTANCE_ID, "instance_id"}, -- {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, -- {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, -- {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, -- {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, -- {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, -- {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, -- {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, -- {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, -- {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, -- {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, -- {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, -- {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, -- {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, -- {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, -- {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, --}; -- - struct vkd3d_d3d_asm_colours - { - const char *reset; -@@ -615,21 +584,54 @@ static void shader_print_tessellator_partitioning(struct vkd3d_d3d_asm_compiler - vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, partitioning, suffix); - } - --static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_input_sysval_semantic semantic) -+static void shader_print_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_shader_input_sysval_semantic semantic, const char *suffix) - { - unsigned int i; - -+ static const struct -+ { -+ enum vkd3d_shader_input_sysval_semantic sysval_semantic; -+ const char *sysval_name; -+ } -+ shader_input_sysval_semantic_names[] = -+ { -+ {VKD3D_SIV_POSITION, "position"}, -+ {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, -+ {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, -+ {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, -+ {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, -+ {VKD3D_SIV_VERTEX_ID, "vertex_id"}, -+ {VKD3D_SIV_INSTANCE_ID, "instance_id"}, -+ {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, -+ {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, -+ {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, -+ {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, -+ {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, -+ {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, -+ {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, -+ {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, -+ {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, -+ {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, -+ {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, -+ {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, -+ {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, -+ {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, -+ {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, -+ }; -+ - for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i) - { -- if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic) -- { -- vkd3d_string_buffer_printf(&compiler->buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name); -- return; -- } -+ if (shader_input_sysval_semantic_names[i].sysval_semantic != semantic) -+ continue; -+ -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", -+ prefix, shader_input_sysval_semantic_names[i].sysval_name, suffix); -+ return; - } - -- vkd3d_string_buffer_printf(&compiler->buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic); -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, semantic, compiler->colours.reset, suffix); - } - - static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_resource_type type) -@@ -704,124 +706,129 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil - vkd3d_string_buffer_printf(&compiler->buffer, ")"); - } - --static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_semantic *semantic, uint32_t flags) -+static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_semantic *semantic, uint32_t flags, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ unsigned int usage_idx; -+ const char *usage; -+ bool indexed; - - if (semantic->resource.reg.reg.type == VKD3DSPR_COMBINED_SAMPLER) - { - switch (semantic->resource_type) - { - case VKD3D_SHADER_RESOURCE_TEXTURE_2D: -- shader_addline(buffer, "_2d"); -+ usage = "2d"; - break; -- - case VKD3D_SHADER_RESOURCE_TEXTURE_3D: -- shader_addline(buffer, "_volume"); -+ usage = "volume"; - break; -- - case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: -- shader_addline(buffer, "_cube"); -+ usage = "cube"; - break; -- - default: -- shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, semantic->resource_type, compiler->colours.reset, suffix); -+ return; - } -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); -+ return; - } -- else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) -+ -+ if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) - { -+ vkd3d_string_buffer_printf(buffer, "%s", prefix); - if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) -- shader_addline(buffer, "_resource"); -+ vkd3d_string_buffer_printf(buffer, "resource_"); - -- shader_addline(buffer, "_"); - shader_dump_resource_type(compiler, semantic->resource_type); - if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS - || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) - { -- shader_addline(buffer, "(%u)", semantic->sample_count); -+ vkd3d_string_buffer_printf(buffer, "(%u)", semantic->sample_count); - } - if (semantic->resource.reg.reg.type == VKD3DSPR_UAV) - shader_dump_uav_flags(compiler, flags); -- shader_addline(buffer, " "); -+ vkd3d_string_buffer_printf(buffer, " "); - shader_dump_resource_data_type(compiler, semantic->resource_data_type); -+ vkd3d_string_buffer_printf(buffer, "%s", suffix); -+ return; - } -- else -- { -- /* Pixel shaders 3.0 don't have usage semantics. */ -- if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) -- && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) -- return; -- else -- shader_addline(buffer, "_"); -- -- switch (semantic->usage) -- { -- case VKD3D_DECL_USAGE_POSITION: -- shader_addline(buffer, "position%u", semantic->usage_idx); -- break; -- -- case VKD3D_DECL_USAGE_BLEND_INDICES: -- shader_addline(buffer, "blend"); -- break; -- -- case VKD3D_DECL_USAGE_BLEND_WEIGHT: -- shader_addline(buffer, "weight"); -- break; -- -- case VKD3D_DECL_USAGE_NORMAL: -- shader_addline(buffer, "normal%u", semantic->usage_idx); -- break; -- -- case VKD3D_DECL_USAGE_PSIZE: -- shader_addline(buffer, "psize"); -- break; -- -- case VKD3D_DECL_USAGE_COLOR: -- if (!semantic->usage_idx) -- shader_addline(buffer, "color"); -- else -- shader_addline(buffer, "specular%u", (semantic->usage_idx - 1)); -- break; -- -- case VKD3D_DECL_USAGE_TEXCOORD: -- shader_addline(buffer, "texcoord%u", semantic->usage_idx); -- break; -- -- case VKD3D_DECL_USAGE_TANGENT: -- shader_addline(buffer, "tangent"); -- break; -- -- case VKD3D_DECL_USAGE_BINORMAL: -- shader_addline(buffer, "binormal"); -- break; -- -- case VKD3D_DECL_USAGE_TESS_FACTOR: -- shader_addline(buffer, "tessfactor"); -- break; -- -- case VKD3D_DECL_USAGE_POSITIONT: -- shader_addline(buffer, "positionT%u", semantic->usage_idx); -- break; -- -- case VKD3D_DECL_USAGE_FOG: -- shader_addline(buffer, "fog"); -- break; - -- case VKD3D_DECL_USAGE_DEPTH: -- shader_addline(buffer, "depth"); -- break; -+ /* Pixel shaders 3.0 don't have usage semantics. */ -+ if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) -+ && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) -+ return; - -- case VKD3D_DECL_USAGE_SAMPLE: -- shader_addline(buffer, "sample"); -+ indexed = false; -+ usage_idx = semantic->usage_idx; -+ switch (semantic->usage) -+ { -+ case VKD3D_DECL_USAGE_POSITION: -+ usage = "position"; -+ indexed = true; -+ break; -+ case VKD3D_DECL_USAGE_BLEND_INDICES: -+ usage = "blend"; -+ break; -+ case VKD3D_DECL_USAGE_BLEND_WEIGHT: -+ usage = "weight"; -+ break; -+ case VKD3D_DECL_USAGE_NORMAL: -+ usage = "normal"; -+ indexed = true; -+ break; -+ case VKD3D_DECL_USAGE_PSIZE: -+ usage = "psize"; -+ break; -+ case VKD3D_DECL_USAGE_COLOR: -+ if (semantic->usage_idx) -+ { -+ usage = "specular"; -+ indexed = true; -+ --usage_idx; - break; -- -- default: -- shader_addline(buffer, "", semantic->usage); -- FIXME("Unrecognised semantic usage %#x.\n", semantic->usage); -- } -+ } -+ usage = "color"; -+ break; -+ case VKD3D_DECL_USAGE_TEXCOORD: -+ usage = "texcoord"; -+ indexed = true; -+ break; -+ case VKD3D_DECL_USAGE_TANGENT: -+ usage = "tangent"; -+ break; -+ case VKD3D_DECL_USAGE_BINORMAL: -+ usage = "binormal"; -+ break; -+ case VKD3D_DECL_USAGE_TESS_FACTOR: -+ usage = "tessfactor"; -+ break; -+ case VKD3D_DECL_USAGE_POSITIONT: -+ usage = "positionT"; -+ indexed = true; -+ break; -+ case VKD3D_DECL_USAGE_FOG: -+ usage = "fog"; -+ break; -+ case VKD3D_DECL_USAGE_DEPTH: -+ usage = "depth"; -+ break; -+ case VKD3D_DECL_USAGE_SAMPLE: -+ usage = "sample"; -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, semantic->usage, usage_idx, compiler->colours.reset, suffix); -+ return; - } -+ -+ if (indexed) -+ vkd3d_string_buffer_printf(buffer, "%s%s%u%s", prefix, usage, usage_idx, suffix); -+ else -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); - } - - static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, -@@ -937,8 +944,8 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler - vkd3d_string_buffer_printf(&compiler->buffer, "*]"); - } - --static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg, -- bool is_declaration) -+static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, -+ const struct vkd3d_shader_register *reg, bool is_declaration, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; - unsigned int offset = reg->idx[0].offset; -@@ -947,22 +954,23 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; - static const char * const misctype_reg_names[] = {"vPos", "vFace"}; - -- shader_addline(buffer, "%s", reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); -+ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, -+ reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); - switch (reg->type) - { - case VKD3DSPR_TEMP: -- shader_addline(buffer, "r"); -+ vkd3d_string_buffer_printf(buffer, "r"); - break; - - case VKD3DSPR_INPUT: -- shader_addline(buffer, "v"); -+ vkd3d_string_buffer_printf(buffer, "v"); - break; - - case VKD3DSPR_CONST: - case VKD3DSPR_CONST2: - case VKD3DSPR_CONST3: - case VKD3DSPR_CONST4: -- shader_addline(buffer, "c"); -+ vkd3d_string_buffer_printf(buffer, "c"); - offset = shader_get_float_offset(reg->type, offset); - break; - -@@ -972,205 +980,202 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - break; - - case VKD3DSPR_RASTOUT: -- shader_addline(buffer, "%s", rastout_reg_names[offset]); -+ vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]); - break; - - case VKD3DSPR_COLOROUT: -- shader_addline(buffer, "oC"); -+ vkd3d_string_buffer_printf(buffer, "oC"); - break; - - case VKD3DSPR_DEPTHOUT: -- shader_addline(buffer, "oDepth"); -+ vkd3d_string_buffer_printf(buffer, "oDepth"); - break; - - case VKD3DSPR_DEPTHOUTGE: -- shader_addline(buffer, "oDepthGE"); -+ vkd3d_string_buffer_printf(buffer, "oDepthGE"); - break; - - case VKD3DSPR_DEPTHOUTLE: -- shader_addline(buffer, "oDepthLE"); -+ vkd3d_string_buffer_printf(buffer, "oDepthLE"); - break; - - case VKD3DSPR_ATTROUT: -- shader_addline(buffer, "oD"); -+ vkd3d_string_buffer_printf(buffer, "oD"); - break; - - case VKD3DSPR_TEXCRDOUT: - /* Vertex shaders >= 3.0 use general purpose output registers - * (VKD3DSPR_OUTPUT), which can include an address token. */ - if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)) -- shader_addline(buffer, "o"); -+ vkd3d_string_buffer_printf(buffer, "o"); - else -- shader_addline(buffer, "oT"); -+ vkd3d_string_buffer_printf(buffer, "oT"); - break; - - case VKD3DSPR_CONSTINT: -- shader_addline(buffer, "i"); -+ vkd3d_string_buffer_printf(buffer, "i"); - break; - - case VKD3DSPR_CONSTBOOL: -- shader_addline(buffer, "b"); -+ vkd3d_string_buffer_printf(buffer, "b"); - break; - - case VKD3DSPR_LABEL: -- shader_addline(buffer, "l"); -+ vkd3d_string_buffer_printf(buffer, "l"); - break; - - case VKD3DSPR_LOOP: -- shader_addline(buffer, "aL"); -+ vkd3d_string_buffer_printf(buffer, "aL"); - break; - - case VKD3DSPR_COMBINED_SAMPLER: - case VKD3DSPR_SAMPLER: -- shader_addline(buffer, "s"); -+ vkd3d_string_buffer_printf(buffer, "s"); - is_descriptor = true; - break; - - case VKD3DSPR_MISCTYPE: - if (offset > 1) -- { -- FIXME("Unhandled misctype register %u.\n", offset); -- shader_addline(buffer, "", offset); -- } -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, offset, compiler->colours.reset); - else -- { -- shader_addline(buffer, "%s", misctype_reg_names[offset]); -- } -+ vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]); - break; - - case VKD3DSPR_PREDICATE: -- shader_addline(buffer, "p"); -+ vkd3d_string_buffer_printf(buffer, "p"); - break; - - case VKD3DSPR_IMMCONST: -- shader_addline(buffer, "l"); -+ vkd3d_string_buffer_printf(buffer, "l"); - break; - - case VKD3DSPR_IMMCONST64: -- shader_addline(buffer, "d"); -+ vkd3d_string_buffer_printf(buffer, "d"); - break; - - case VKD3DSPR_CONSTBUFFER: -- shader_addline(buffer, "cb"); -+ vkd3d_string_buffer_printf(buffer, "cb"); - is_descriptor = true; - break; - - case VKD3DSPR_IMMCONSTBUFFER: -- shader_addline(buffer, "icb"); -+ vkd3d_string_buffer_printf(buffer, "icb"); - break; - - case VKD3DSPR_PRIMID: -- shader_addline(buffer, "primID"); -+ vkd3d_string_buffer_printf(buffer, "primID"); - break; - - case VKD3DSPR_NULL: -- shader_addline(buffer, "null"); -+ vkd3d_string_buffer_printf(buffer, "null"); - break; - - case VKD3DSPR_RASTERIZER: -- shader_addline(buffer, "rasterizer"); -+ vkd3d_string_buffer_printf(buffer, "rasterizer"); - break; - - case VKD3DSPR_RESOURCE: -- shader_addline(buffer, "t"); -+ vkd3d_string_buffer_printf(buffer, "t"); - is_descriptor = true; - break; - - case VKD3DSPR_UAV: -- shader_addline(buffer, "u"); -+ vkd3d_string_buffer_printf(buffer, "u"); - is_descriptor = true; - break; - - case VKD3DSPR_OUTPOINTID: -- shader_addline(buffer, "vOutputControlPointID"); -+ vkd3d_string_buffer_printf(buffer, "vOutputControlPointID"); - break; - - case VKD3DSPR_FORKINSTID: -- shader_addline(buffer, "vForkInstanceId"); -+ vkd3d_string_buffer_printf(buffer, "vForkInstanceId"); - break; - - case VKD3DSPR_JOININSTID: -- shader_addline(buffer, "vJoinInstanceId"); -+ vkd3d_string_buffer_printf(buffer, "vJoinInstanceId"); - break; - - case VKD3DSPR_INCONTROLPOINT: -- shader_addline(buffer, "vicp"); -+ vkd3d_string_buffer_printf(buffer, "vicp"); - break; - - case VKD3DSPR_OUTCONTROLPOINT: -- shader_addline(buffer, "vocp"); -+ vkd3d_string_buffer_printf(buffer, "vocp"); - break; - - case VKD3DSPR_PATCHCONST: -- shader_addline(buffer, "vpc"); -+ vkd3d_string_buffer_printf(buffer, "vpc"); - break; - - case VKD3DSPR_TESSCOORD: -- shader_addline(buffer, "vDomainLocation"); -+ vkd3d_string_buffer_printf(buffer, "vDomainLocation"); - break; - - case VKD3DSPR_GROUPSHAREDMEM: -- shader_addline(buffer, "g"); -+ vkd3d_string_buffer_printf(buffer, "g"); - break; - - case VKD3DSPR_THREADID: -- shader_addline(buffer, "vThreadID"); -+ vkd3d_string_buffer_printf(buffer, "vThreadID"); - break; - - case VKD3DSPR_THREADGROUPID: -- shader_addline(buffer, "vThreadGroupID"); -+ vkd3d_string_buffer_printf(buffer, "vThreadGroupID"); - break; - - case VKD3DSPR_LOCALTHREADID: -- shader_addline(buffer, "vThreadIDInGroup"); -+ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup"); - break; - - case VKD3DSPR_LOCALTHREADINDEX: -- shader_addline(buffer, "vThreadIDInGroupFlattened"); -+ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened"); - break; - - case VKD3DSPR_IDXTEMP: -- shader_addline(buffer, "x"); -+ vkd3d_string_buffer_printf(buffer, "x"); - break; - - case VKD3DSPR_STREAM: -- shader_addline(buffer, "m"); -+ vkd3d_string_buffer_printf(buffer, "m"); - break; - - case VKD3DSPR_FUNCTIONBODY: -- shader_addline(buffer, "fb"); -+ vkd3d_string_buffer_printf(buffer, "fb"); - break; - - case VKD3DSPR_FUNCTIONPOINTER: -- shader_addline(buffer, "fp"); -+ vkd3d_string_buffer_printf(buffer, "fp"); - break; - - case VKD3DSPR_COVERAGE: -- shader_addline(buffer, "vCoverage"); -+ vkd3d_string_buffer_printf(buffer, "vCoverage"); - break; - - case VKD3DSPR_SAMPLEMASK: -- shader_addline(buffer, "oMask"); -+ vkd3d_string_buffer_printf(buffer, "oMask"); - break; - - case VKD3DSPR_GSINSTID: -- shader_addline(buffer, "vGSInstanceID"); -+ vkd3d_string_buffer_printf(buffer, "vGSInstanceID"); - break; - - case VKD3DSPR_OUTSTENCILREF: -- shader_addline(buffer, "oStencilRef"); -+ vkd3d_string_buffer_printf(buffer, "oStencilRef"); - break; - - case VKD3DSPR_UNDEF: -- shader_addline(buffer, "undef"); -+ vkd3d_string_buffer_printf(buffer, "undef"); - break; - - case VKD3DSPR_SSA: -- shader_addline(buffer, "sr"); -+ vkd3d_string_buffer_printf(buffer, "sr"); - break; - - default: -- shader_addline(buffer, "", reg->type); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->type, compiler->colours.reset); - break; - } - -@@ -1189,7 +1194,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - break; - } - -- shader_addline(buffer, "%s(", compiler->colours.reset); -+ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); - switch (reg->dimension) - { - case VSIR_DIMENSION_SCALAR: -@@ -1210,7 +1215,8 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); - break; - default: -- shader_addline(buffer, "", reg->data_type); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->data_type, compiler->colours.reset); - break; - } - break; -@@ -1249,20 +1255,22 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[3], ""); - break; - default: -- shader_addline(buffer, "", reg->data_type); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->data_type, compiler->colours.reset); - break; - } - break; - - default: -- shader_addline(buffer, "", reg->dimension); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->dimension, compiler->colours.reset); - break; - } -- shader_addline(buffer, ")"); -+ vkd3d_string_buffer_printf(buffer, ")"); - } - else if (reg->type == VKD3DSPR_IMMCONST64) - { -- shader_addline(buffer, "%s(", compiler->colours.reset); -+ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); - /* A double2 vector is treated as a float4 vector in enum vsir_dimension. */ - if (reg->dimension == VSIR_DIMENSION_SCALAR || reg->dimension == VSIR_DIMENSION_VEC4) - { -@@ -1280,14 +1288,16 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - } - else - { -- shader_addline(buffer, "", reg->data_type); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->data_type, compiler->colours.reset); - } - } - else - { -- shader_addline(buffer, "", reg->dimension); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->dimension, compiler->colours.reset); - } -- shader_addline(buffer, ")"); -+ vkd3d_string_buffer_printf(buffer, ")"); - } - else if (reg->type != VKD3DSPR_RASTOUT - && reg->type != VKD3DSPR_MISCTYPE -@@ -1331,7 +1341,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - } - else - { -- shader_addline(buffer, "%s", compiler->colours.reset); -+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); - } - - if (reg->type == VKD3DSPR_FUNCTIONPOINTER) -@@ -1339,8 +1349,9 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - } - else - { -- shader_addline(buffer, "%s", compiler->colours.reset); -+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); - } -+ vkd3d_string_buffer_printf(buffer, "%s", suffix); - } - - static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) -@@ -1384,8 +1395,8 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co - compiler->colours.modifier, compiler->colours.reset); - } - --static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_register *reg) -+static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_register *reg, const char *suffix) - { - static const char *dimensions[] = - { -@@ -1398,7 +1409,10 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, - const char *dimension; - - if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES)) -+ { -+ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, suffix); - return; -+ } - - if (reg->data_type == VKD3D_DATA_UNUSED) - return; -@@ -1408,9 +1422,9 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, - else - dimension = "??"; - -- shader_addline(buffer, " <%s", dimension); -+ vkd3d_string_buffer_printf(buffer, "%s <%s", prefix, dimension); - shader_dump_data_type(compiler, reg->data_type); -- shader_addline(buffer, ">"); -+ vkd3d_string_buffer_printf(buffer, ">%s", suffix); - } - - static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler, -@@ -1444,8 +1458,7 @@ static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, - { - uint32_t write_mask = param->write_mask; - -- vkd3d_string_buffer_printf(&compiler->buffer, "%s", prefix); -- shader_dump_register(compiler, ¶m->reg, is_declaration); -+ shader_print_register(compiler, prefix, ¶m->reg, is_declaration, ""); - - if (write_mask && param->reg.dimension == VSIR_DIMENSION_VEC4) - { -@@ -1457,8 +1470,7 @@ static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, - - shader_print_precision(compiler, ¶m->reg); - shader_print_non_uniform(compiler, ¶m->reg); -- shader_dump_reg_type(compiler, ¶m->reg); -- vkd3d_string_buffer_printf(&compiler->buffer, "%s", suffix); -+ shader_print_reg_type(compiler, "", ¶m->reg, suffix); - } - - static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, -@@ -1468,6 +1480,7 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, - struct vkd3d_string_buffer *buffer = &compiler->buffer; - uint32_t swizzle = param->swizzle; - const char *modifier = ""; -+ bool is_abs = false; - - if (src_modifier == VKD3DSPSM_NEG - || src_modifier == VKD3DSPSM_BIASNEG -@@ -1482,9 +1495,9 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, - vkd3d_string_buffer_printf(buffer, "%s%s", prefix, modifier); - - if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) -- vkd3d_string_buffer_printf(buffer, "|"); -+ is_abs = true; - -- shader_dump_register(compiler, ¶m->reg, false); -+ shader_print_register(compiler, is_abs ? "|" : "", ¶m->reg, false, ""); - - switch (src_modifier) - { -@@ -1543,13 +1556,12 @@ static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, - swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset); - } - -- if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) -+ if (is_abs) - vkd3d_string_buffer_printf(buffer, "|"); - - shader_print_precision(compiler, ¶m->reg); - shader_print_non_uniform(compiler, ¶m->reg); -- shader_dump_reg_type(compiler, ¶m->reg); -- vkd3d_string_buffer_printf(buffer, "%s", suffix); -+ shader_print_reg_type(compiler, "", ¶m->reg, suffix); - } - - static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, -@@ -1578,87 +1590,93 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, - if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask); - } - --static void shader_dump_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_primitive_type *primitive_type) -+static void shader_print_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_primitive_type *p, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *primitive_type; - -- switch (primitive_type->type) -+ switch (p->type) - { - case VKD3D_PT_UNDEFINED: -- shader_addline(buffer, "undefined"); -+ primitive_type = "undefined"; - break; - case VKD3D_PT_POINTLIST: -- shader_addline(buffer, "pointlist"); -+ primitive_type = "pointlist"; - break; - case VKD3D_PT_LINELIST: -- shader_addline(buffer, "linelist"); -+ primitive_type = "linelist"; - break; - case VKD3D_PT_LINESTRIP: -- shader_addline(buffer, "linestrip"); -+ primitive_type = "linestrip"; - break; - case VKD3D_PT_TRIANGLELIST: -- shader_addline(buffer, "trianglelist"); -+ primitive_type = "trianglelist"; - break; - case VKD3D_PT_TRIANGLESTRIP: -- shader_addline(buffer, "trianglestrip"); -+ primitive_type = "trianglestrip"; - break; - case VKD3D_PT_TRIANGLEFAN: -- shader_addline(buffer, "trianglefan"); -+ primitive_type = "trianglefan"; - break; - case VKD3D_PT_LINELIST_ADJ: -- shader_addline(buffer, "linelist_adj"); -+ primitive_type = "linelist_adj"; - break; - case VKD3D_PT_LINESTRIP_ADJ: -- shader_addline(buffer, "linestrip_adj"); -+ primitive_type = "linestrip_adj"; - break; - case VKD3D_PT_TRIANGLELIST_ADJ: -- shader_addline(buffer, "trianglelist_adj"); -+ primitive_type = "trianglelist_adj"; - break; - case VKD3D_PT_TRIANGLESTRIP_ADJ: -- shader_addline(buffer, "trianglestrip_adj"); -+ primitive_type = "trianglestrip_adj"; - break; - case VKD3D_PT_PATCH: -- shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count); -- break; -+ vkd3d_string_buffer_printf(buffer, "%spatch%u%s", prefix, p->patch_vertex_count, suffix); -+ return; - default: -- shader_addline(buffer, "", primitive_type->type); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, p->type, compiler->colours.reset, suffix); -+ return; - } -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive_type, suffix); - } - --static void shader_dump_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_interpolation_mode interpolation_mode) -+static void shader_print_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_shader_interpolation_mode m, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *mode; - -- switch (interpolation_mode) -+ switch (m) - { - case VKD3DSIM_CONSTANT: -- shader_addline(buffer, "constant"); -+ mode = "constant"; - break; - case VKD3DSIM_LINEAR: -- shader_addline(buffer, "linear"); -+ mode = "linear"; - break; - case VKD3DSIM_LINEAR_CENTROID: -- shader_addline(buffer, "linear centroid"); -+ mode = "linear centroid"; - break; - case VKD3DSIM_LINEAR_NOPERSPECTIVE: -- shader_addline(buffer, "linear noperspective"); -+ mode = "linear noperspective"; - break; - case VKD3DSIM_LINEAR_SAMPLE: -- shader_addline(buffer, "linear sample"); -+ mode = "linear sample"; - break; - case VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID: -- shader_addline(buffer, "linear noperspective centroid"); -+ mode = "linear noperspective centroid"; - break; - case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: -- shader_addline(buffer, "linear noperspective sample"); -+ mode = "linear noperspective sample"; - break; - default: -- shader_addline(buffer, "", interpolation_mode); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, m, compiler->colours.reset, suffix); -+ return; - } -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, mode, suffix); - } - - const char *shader_get_type_prefix(enum vkd3d_shader_type type) -@@ -1849,16 +1867,15 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_UAV_TYPED: - vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.opcode); -- shader_dump_decl_usage(compiler, &ins->declaration.semantic, ins->flags); -+ shader_print_dcl_usage(compiler, "_", &ins->declaration.semantic, ins->flags, ""); - shader_dump_ins_modifiers(compiler, &ins->declaration.semantic.resource.reg); -- vkd3d_string_buffer_printf(buffer, "%s ", compiler->colours.reset); -- shader_dump_register(compiler, &ins->declaration.semantic.resource.reg.reg, true); -+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); -+ shader_print_register(compiler, " ", &ins->declaration.semantic.resource.reg.reg, true, ""); - shader_dump_register_space(compiler, ins->declaration.semantic.resource.range.space); - break; - - case VKD3DSIH_DCL_CONSTANT_BUFFER: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_register(compiler, &ins->declaration.cb.src.reg, true); -+ shader_print_register(compiler, " ", &ins->declaration.cb.src.reg, true, ""); - if (vkd3d_shader_ver_ge(&compiler->shader_version, 6, 0)) - shader_print_subscript(compiler, ins->declaration.cb.size, NULL); - else if (vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1)) -@@ -1906,8 +1923,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3DSIH_DCL_INPUT_PS: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_interpolation_mode(compiler, ins->flags); -+ shader_print_interpolation_mode(compiler, " ", ins->flags, ""); - shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); - break; - -@@ -1916,16 +1932,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - case VKD3DSIH_DCL_INPUT_SIV: - case VKD3DSIH_DCL_OUTPUT_SIV: - shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); -- shader_addline(buffer, ", "); -- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); -+ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); - break; - - case VKD3DSIH_DCL_INPUT_PS_SIV: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_interpolation_mode(compiler, ins->flags); -+ shader_print_interpolation_mode(compiler, " ", ins->flags, ""); - shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); -- shader_addline(buffer, ", "); -- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); -+ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); - break; - - case VKD3DSIH_DCL_INPUT: -@@ -1935,8 +1948,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - - case VKD3DSIH_DCL_INPUT_PRIMITIVE: - case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_primitive_type(compiler, &ins->declaration.primitive_type); -+ shader_print_primitive_type(compiler, " ", &ins->declaration.primitive_type, ""); - break; - - case VKD3DSIH_DCL_INTERFACE: -@@ -1958,10 +1970,8 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3DSIH_DCL_SAMPLER: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_register(compiler, &ins->declaration.sampler.src.reg, true); -- if (ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE) -- shader_addline(buffer, ", comparisonMode"); -+ shader_print_register(compiler, " ", &ins->declaration.sampler.src.reg, true, -+ ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE ? ", comparisonMode" : ""); - shader_dump_register_space(compiler, ins->declaration.sampler.range.space); - break; - -@@ -2354,6 +2364,10 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - break; - - case VKD3DSIH_LABEL: -+ case VKD3DSIH_HS_DECLS: -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: - indent = 0; - break; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 099729fbb6c..4685afa082d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1522,63 +1522,94 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - - D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - { -- switch (type->base_type) -+ switch (type->class) - { -- case HLSL_TYPE_BOOL: -- return D3DXPT_BOOL; -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- return D3DXPT_FLOAT; -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- return D3DXPT_INT; -- case HLSL_TYPE_PIXELSHADER: -- return D3DXPT_PIXELSHADER; -- case HLSL_TYPE_SAMPLER: -- switch (type->sampler_dim) -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ switch (type->base_type) - { -- case HLSL_SAMPLER_DIM_1D: -- return D3DXPT_SAMPLER1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3DXPT_SAMPLER2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3DXPT_SAMPLER3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3DXPT_SAMPLERCUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3DXPT_SAMPLER; -+ case HLSL_TYPE_BOOL: -+ return D3DXPT_BOOL; -+ /* Actually double behaves differently depending on DLL version: -+ * For <= 36, it maps to D3DXPT_FLOAT. -+ * For 37-40, it maps to zero (D3DXPT_VOID). -+ * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* -+ * values are mostly compatible with D3DXPT_*). -+ * However, the latter two cases look like bugs, and a reasonable -+ * application certainly wouldn't know what to do with them. -+ * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ -+ case HLSL_TYPE_DOUBLE: -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ return D3DXPT_FLOAT; -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ return D3DXPT_INT; - default: -- ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } -- break; -- case HLSL_TYPE_STRING: -- return D3DXPT_STRING; -- case HLSL_TYPE_TEXTURE: -- switch (type->sampler_dim) -+ -+ case HLSL_CLASS_OBJECT: -+ switch (type->base_type) - { -- case HLSL_SAMPLER_DIM_1D: -- return D3DXPT_TEXTURE1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3DXPT_TEXTURE2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3DXPT_TEXTURE3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3DXPT_TEXTURECUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3DXPT_TEXTURE; -+ case HLSL_TYPE_PIXELSHADER: -+ return D3DXPT_PIXELSHADER; -+ case HLSL_TYPE_SAMPLER: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3DXPT_SAMPLER1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3DXPT_SAMPLER2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3DXPT_SAMPLER3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return D3DXPT_SAMPLERCUBE; -+ case HLSL_SAMPLER_DIM_GENERIC: -+ return D3DXPT_SAMPLER; -+ default: -+ ERR("Invalid dimension %#x.\n", type->sampler_dim); -+ vkd3d_unreachable(); -+ } -+ break; -+ case HLSL_TYPE_STRING: -+ return D3DXPT_STRING; -+ case HLSL_TYPE_TEXTURE: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3DXPT_TEXTURE1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3DXPT_TEXTURE2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3DXPT_TEXTURE3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return D3DXPT_TEXTURECUBE; -+ case HLSL_SAMPLER_DIM_GENERIC: -+ return D3DXPT_TEXTURE; -+ default: -+ ERR("Invalid dimension %#x.\n", type->sampler_dim); -+ vkd3d_unreachable(); -+ } -+ break; -+ case HLSL_TYPE_VERTEXSHADER: -+ return D3DXPT_VERTEXSHADER; -+ case HLSL_TYPE_VOID: -+ return D3DXPT_VOID; - default: -- ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } -- break; -- case HLSL_TYPE_VERTEXSHADER: -- return D3DXPT_VERTEXSHADER; -- case HLSL_TYPE_VOID: -- return D3DXPT_VOID; -- default: - vkd3d_unreachable(); -+ -+ case HLSL_CLASS_ARRAY: -+ return hlsl_sm1_base_type(type->e.array.type); -+ -+ case HLSL_CLASS_STRUCT: -+ return D3DXPT_VOID; - } -+ -+ vkd3d_unreachable(); - } - - static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) -@@ -2572,19 +2603,11 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - { - if (instr->data_type) - { -- if (instr->data_type->class == HLSL_CLASS_MATRIX) -+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { -- /* These need to be lowered. */ -- hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); -- continue; -- } -- else if (instr->data_type->class == HLSL_CLASS_OBJECT) -- { -- hlsl_fixme(ctx, &instr->loc, "Object copy."); -+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); - break; - } -- -- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); - } - - switch (instr->type) -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index b5a61d99d3f..da8ba662dbc 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -404,6 +404,7 @@ enum dx_intrinsic_opcode - DX_ATOMIC_BINOP = 78, - DX_ATOMIC_CMP_XCHG = 79, - DX_BARRIER = 80, -+ DX_CALCULATE_LOD = 81, - DX_DISCARD = 82, - DX_DERIV_COARSEX = 83, - DX_DERIV_COARSEY = 84, -@@ -2885,6 +2886,122 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co - return VKD3D_OK; - } - -+static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct sm6_value *dst) -+{ -+ const struct sm6_type *elem_type, *pointee_type, *gep_type, *ptr_type; -+ struct sm6_value *operands[3]; -+ unsigned int i, j, offset; -+ uint64_t value; -+ -+ i = 0; -+ pointee_type = (record->operand_count & 1) ? sm6_parser_get_type(sm6, record->operands[i++]) : NULL; -+ -+ if (!dxil_record_validate_operand_count(record, i + 6, i + 6, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ for (j = 0; i < record->operand_count; i += 2, ++j) -+ { -+ if (!(elem_type = sm6_parser_get_type(sm6, record->operands[i]))) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if ((value = record->operands[i + 1]) >= sm6->cur_max_value) -+ { -+ WARN("Invalid value index %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value index %"PRIu64".", value); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ else if (value == sm6->value_count) -+ { -+ WARN("Invalid value self-reference at %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value self-reference for a constexpr GEP."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ operands[j] = &sm6->values[value]; -+ if (value > sm6->value_count) -+ { -+ operands[j]->type = elem_type; -+ } -+ else if (operands[j]->type != elem_type) -+ { -+ WARN("Type mismatch.\n"); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, -+ "Type mismatch in constexpr GEP elements."); -+ } -+ } -+ -+ if (operands[0]->u.reg.idx_count > 1) -+ { -+ WARN("Unsupported stacked GEP.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "A GEP instruction on the result of a previous GEP is unsupported."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!sm6_value_is_constant_zero(operands[1])) -+ { -+ WARN("Expected constant zero.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "The pointer dereference index for a constexpr GEP instruction is not constant zero."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!sm6_value_is_constant(operands[2]) || !sm6_type_is_integer(operands[2]->type)) -+ { -+ WARN("Element index is not constant int.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "A constexpr GEP element index is not a constant integer."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ dst->structure_stride = operands[0]->structure_stride; -+ -+ ptr_type = operands[0]->type; -+ if (!sm6_type_is_pointer(ptr_type)) -+ { -+ WARN("Constexpr GEP base value is not a pointer.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "A constexpr GEP base value is not a pointer."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!pointee_type) -+ { -+ pointee_type = ptr_type->u.pointer.type; -+ } -+ else if (pointee_type != ptr_type->u.pointer.type) -+ { -+ WARN("Explicit pointee type mismatch.\n"); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, -+ "Explicit pointee type for constexpr GEP does not match the element type."); -+ } -+ -+ offset = sm6_value_get_constant_uint(operands[2]); -+ if (!(gep_type = sm6_type_get_element_type_at_index(pointee_type, offset))) -+ { -+ WARN("Failed to get element type.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Failed to get the element type of a constexpr GEP."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!(dst->type = sm6_type_get_pointer_to_type(gep_type, ptr_type->u.pointer.addr_space, sm6))) -+ { -+ WARN("Failed to get pointer type for type %u.\n", gep_type->class); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "Module does not define a pointer type for a constexpr GEP result."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ dst->u.reg = operands[0]->u.reg; -+ dst->u.reg.idx[1].offset = offset; -+ dst->u.reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP; -+ dst->u.reg.idx_count = 2; -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) - { - enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; -@@ -3005,6 +3122,12 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - - break; - -+ case CST_CODE_CE_GEP: -+ case CST_CODE_CE_INBOUNDS_GEP: -+ if ((ret = sm6_parser_init_constexpr_gep(sm6, record, dst)) < 0) -+ return ret; -+ break; -+ - case CST_CODE_UNDEF: - dxil_record_validate_operand_max_count(record, 0, sm6); - dst->u.reg.type = VKD3DSPR_UNDEF; -@@ -4364,6 +4487,40 @@ static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enu - instruction_dst_param_init_ssa_scalar(ins, sm6); - } - -+static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ const struct sm6_value *resource, *sampler; -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_register coord; -+ unsigned int clamp; -+ -+ resource = operands[0]; -+ sampler = operands[1]; -+ if (!sm6_value_validate_is_texture_handle(resource, op, sm6) -+ || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) -+ { -+ return; -+ } -+ -+ if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], 3, NULL, state, &coord)) -+ return; -+ -+ clamp = sm6_value_get_constant_uint(operands[5]); -+ -+ ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LOD); -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; -+ src_param_init_vector_from_reg(&src_params[0], &coord); -+ src_params[1].reg = resource->u.handle.reg; -+ src_param_init_scalar(&src_params[1], !clamp); -+ src_param_init_vector_from_reg(&src_params[2], &sampler->u.handle.reg); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -5392,6 +5549,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, - [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store}, - [DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter}, -+ [DX_CALCULATE_LOD ] = {"f", "HHfffb", sm6_parser_emit_dx_calculate_lod}, - [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, - [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, - [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 466908cd82b..1d90cd70e03 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -115,6 +115,9 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) - - static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - { -+ if (var->state_block_count) -+ hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); -+ - fx->ops->write_pass(var, fx); - } - -@@ -401,14 +404,6 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - uint32_t name_offset, offset, size, stride, numeric_desc; - uint32_t elements_count = 0; - const char *name; -- static const uint32_t variable_type[] = -- { -- [HLSL_CLASS_SCALAR] = 1, -- [HLSL_CLASS_VECTOR] = 1, -- [HLSL_CLASS_MATRIX] = 1, -- [HLSL_CLASS_OBJECT] = 2, -- [HLSL_CLASS_STRUCT] = 3, -- }; - struct hlsl_ctx *ctx = fx->ctx; - - /* Resolve arrays to element type and number of elements. */ -@@ -428,13 +423,19 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: -+ put_u32_unaligned(buffer, 1); -+ break; -+ - case HLSL_CLASS_OBJECT: -+ put_u32_unaligned(buffer, 2); -+ break; -+ - case HLSL_CLASS_STRUCT: -- put_u32_unaligned(buffer, variable_type[type->class]); -+ put_u32_unaligned(buffer, 3); - break; -- default: -- hlsl_fixme(ctx, &ctx->location, "Writing type class %u is not implemented.", type->class); -- return 0; -+ -+ case HLSL_CLASS_ARRAY: -+ vkd3d_unreachable(); - } - - size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); -@@ -630,7 +631,6 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - { - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - uint32_t semantic_offset, offset, elements_count = 0, name_offset; -- struct hlsl_ctx *ctx = fx->ctx; - size_t i; - - /* Resolve arrays to element type and number of elements. */ -@@ -643,22 +643,6 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - name_offset = write_string(name, fx); - semantic_offset = write_string(semantic->name, fx); - -- switch (type->base_type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- case HLSL_TYPE_VOID: -- case HLSL_TYPE_TEXTURE: -- break; -- default: -- hlsl_fixme(ctx, &ctx->location, "Writing parameter type %u is not implemented.", -- type->base_type); -- return 0; -- }; -- - offset = put_u32(buffer, hlsl_sm1_base_type(type)); - put_u32(buffer, hlsl_sm1_class(type)); - put_u32(buffer, name_offset); -@@ -688,6 +672,9 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - for (i = 0; i < type->e.record.field_count; ++i) - { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; -+ -+ /* Validated in check_invalid_object_fields(). */ -+ assert(hlsl_is_numeric_type(field->type)); - write_fx_2_parameter(field->type, field->name, &field->semantic, fx); - } - } -@@ -746,7 +733,7 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f - { - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - const struct hlsl_type *type = var->data_type; -- uint32_t i, offset, size, elements_count = 1; -+ uint32_t offset, size, elements_count = 1; - - size = get_fx_2_type_size(type); - -@@ -756,63 +743,80 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f - type = hlsl_get_multiarray_element_type(type); - } - -- if (type->class == HLSL_CLASS_OBJECT) -- { -- /* Objects are given sequential ids. */ -- offset = put_u32(buffer, fx->object_variable_count++); -- for (i = 1; i < elements_count; ++i) -- put_u32(buffer, fx->object_variable_count++); -- } -- else -+ /* Note that struct fields must all be numeric; -+ * this was validated in check_invalid_object_fields(). */ -+ switch (type->class) - { -- /* FIXME: write actual initial value */ -- offset = put_u32(buffer, 0); -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ case HLSL_CLASS_STRUCT: -+ /* FIXME: write actual initial value */ -+ offset = put_u32(buffer, 0); - -- for (i = 1; i < size / sizeof(uint32_t); ++i) -- put_u32(buffer, 0); -+ for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) -+ put_u32(buffer, 0); -+ break; -+ -+ default: -+ /* Objects are given sequential ids. */ -+ offset = put_u32(buffer, fx->object_variable_count++); -+ for (uint32_t i = 1; i < elements_count; ++i) -+ put_u32(buffer, fx->object_variable_count++); -+ break; - } - - return offset; - } - --static bool is_type_supported_fx_2(const struct hlsl_type *type) -+static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type *type, -+ const struct vkd3d_shader_location *loc) - { -- type = hlsl_get_multiarray_element_type(type); -- -- if (type->class == HLSL_CLASS_STRUCT) -- return true; -- -- switch (type->base_type) -+ switch (type->class) - { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_DOUBLE: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_PIXELSHADER: -- case HLSL_TYPE_VERTEXSHADER: -- case HLSL_TYPE_STRING: -+ case HLSL_CLASS_STRUCT: -+ /* Note that the fields must all be numeric; this was validated in -+ * check_invalid_object_fields(). */ - return true; -- case HLSL_TYPE_TEXTURE: -- case HLSL_TYPE_SAMPLER: -- switch (type->sampler_dim) -+ -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ return true; -+ -+ case HLSL_CLASS_ARRAY: -+ return is_type_supported_fx_2(ctx, type->e.array.type, loc); -+ -+ case HLSL_CLASS_OBJECT: -+ switch (type->base_type) - { -- case HLSL_SAMPLER_DIM_1D: -- case HLSL_SAMPLER_DIM_2D: -- case HLSL_SAMPLER_DIM_3D: -- case HLSL_SAMPLER_DIM_CUBE: -- case HLSL_SAMPLER_DIM_GENERIC: -- return true; -+ case HLSL_TYPE_TEXTURE: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ case HLSL_SAMPLER_DIM_2D: -+ case HLSL_SAMPLER_DIM_3D: -+ case HLSL_SAMPLER_DIM_CUBE: -+ case HLSL_SAMPLER_DIM_GENERIC: -+ return true; -+ default: -+ return false; -+ } -+ break; -+ -+ case HLSL_TYPE_SAMPLER: -+ case HLSL_TYPE_STRING: -+ case HLSL_TYPE_PIXELSHADER: -+ case HLSL_TYPE_VERTEXSHADER: -+ hlsl_fixme(ctx, loc, "Write fx 2.0 parameter object type %#x.", type->base_type); -+ return false; -+ - default: -- ; -+ return false; - } -- break; -- default: -- return false; - } - -- return false; -+ vkd3d_unreachable(); - } - - static void write_fx_2_parameters(struct fx_write_context *fx) -@@ -828,7 +832,7 @@ static void write_fx_2_parameters(struct fx_write_context *fx) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (!is_type_supported_fx_2(var->data_type)) -+ if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) - continue; - - desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 5638a03a8f5..ed80e2b75c8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -134,14 +134,39 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) - return hlsl_get_var(scope->upper, name); - } - --void hlsl_free_var(struct hlsl_ir_var *decl) -+static void free_state_block_entry(struct hlsl_state_block_entry *entry) -+{ -+ vkd3d_free(entry->name); -+ vkd3d_free(entry->args); -+ hlsl_block_cleanup(entry->instrs); -+ vkd3d_free(entry->instrs); -+ vkd3d_free(entry); -+} -+ -+void hlsl_free_state_block(struct hlsl_state_block *state_block) - { - unsigned int k; - -+ assert(state_block); -+ for (k = 0; k < state_block->count; ++k) -+ free_state_block_entry(state_block->entries[k]); -+ vkd3d_free(state_block->entries); -+ vkd3d_free(state_block); -+} -+ -+void hlsl_free_var(struct hlsl_ir_var *decl) -+{ -+ unsigned int k, i; -+ - vkd3d_free((void *)decl->name); - hlsl_cleanup_semantic(&decl->semantic); - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - vkd3d_free((void *)decl->objects_usage[k]); -+ -+ for (i = 0; i < decl->state_block_count; ++i) -+ hlsl_free_state_block(decl->state_blocks[i]); -+ vkd3d_free(decl->state_blocks); -+ - vkd3d_free(decl); - } - -@@ -1561,6 +1586,27 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned - return &swizzle->node; - } - -+struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, -+ struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_stateblock_constant *constant; -+ struct hlsl_type *type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); -+ -+ if (!(constant = hlsl_alloc(ctx, sizeof(*constant)))) -+ return NULL; -+ -+ init_node(&constant->node, HLSL_IR_STATEBLOCK_CONSTANT, type, loc); -+ -+ if (!(constant->name = hlsl_alloc(ctx, strlen(name) + 1))) -+ { -+ vkd3d_free(constant); -+ return NULL; -+ } -+ strcpy(constant->name, name); -+ -+ return &constant->node; -+} -+ - bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) - { - struct hlsl_type *type = index->val.node->data_type; -@@ -1570,7 +1616,10 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) - - bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) - { -- return index->val.node->data_type->class == HLSL_CLASS_OBJECT; -+ const struct hlsl_type *type = index->val.node->data_type; -+ -+ return type->class == HLSL_CLASS_OBJECT -+ && (type->base_type == HLSL_TYPE_TEXTURE || type->base_type == HLSL_TYPE_UAV); - } - - bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) -@@ -1881,6 +1930,12 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr - return dst; - } - -+static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, -+ struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) -+{ -+ return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); -+} -+ - void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) - { - hlsl_block_cleanup(&c->body); -@@ -1976,6 +2031,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - - case HLSL_IR_SWIZZLE: - return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); -+ -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); - } - - vkd3d_unreachable(); -@@ -2631,7 +2689,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - - [HLSL_OP3_CMP] = "cmp", - [HLSL_OP3_DP2ADD] = "dp2add", -- [HLSL_OP3_MOVC] = "movc", - [HLSL_OP3_TERNARY] = "ternary", - }; - -@@ -2808,6 +2865,12 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ - vkd3d_string_buffer_printf(buffer, "]"); - } - -+static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, -+ const struct hlsl_ir_stateblock_constant *constant) -+{ -+ vkd3d_string_buffer_printf(buffer, "%s", constant->name); -+} -+ - static void dump_ir_switch(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_switch *s) - { - struct hlsl_ir_switch_case *c; -@@ -2896,6 +2959,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - case HLSL_IR_SWIZZLE: - dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); - break; -+ -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); -+ break; - } - } - -@@ -3068,6 +3135,12 @@ static void free_ir_index(struct hlsl_ir_index *index) - vkd3d_free(index); - } - -+static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) -+{ -+ vkd3d_free(constant->name); -+ vkd3d_free(constant); -+} -+ - void hlsl_free_instr(struct hlsl_ir_node *node) - { - assert(list_empty(&node->uses)); -@@ -3125,6 +3198,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) - case HLSL_IR_SWITCH: - free_ir_switch(hlsl_ir_switch(node)); - break; -+ -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); -+ break; - } - } - -@@ -3290,7 +3367,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) - {"cs_4_0", VKD3D_SHADER_TYPE_COMPUTE, 4, 0, 0, 0, false}, - {"cs_4_1", VKD3D_SHADER_TYPE_COMPUTE, 4, 1, 0, 0, false}, - {"cs_5_0", VKD3D_SHADER_TYPE_COMPUTE, 5, 0, 0, 0, false}, -+ {"cs_5_1", VKD3D_SHADER_TYPE_COMPUTE, 5, 1, 0, 0, false}, - {"ds_5_0", VKD3D_SHADER_TYPE_DOMAIN, 5, 0, 0, 0, false}, -+ {"ds_5_1", VKD3D_SHADER_TYPE_DOMAIN, 5, 1, 0, 0, false}, - {"fx_2_0", VKD3D_SHADER_TYPE_EFFECT, 2, 0, 0, 0, false}, - {"fx_4_0", VKD3D_SHADER_TYPE_EFFECT, 4, 0, 0, 0, false}, - {"fx_4_1", VKD3D_SHADER_TYPE_EFFECT, 4, 1, 0, 0, false}, -@@ -3298,7 +3377,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) - {"gs_4_0", VKD3D_SHADER_TYPE_GEOMETRY, 4, 0, 0, 0, false}, - {"gs_4_1", VKD3D_SHADER_TYPE_GEOMETRY, 4, 1, 0, 0, false}, - {"gs_5_0", VKD3D_SHADER_TYPE_GEOMETRY, 5, 0, 0, 0, false}, -+ {"gs_5_1", VKD3D_SHADER_TYPE_GEOMETRY, 5, 1, 0, 0, false}, - {"hs_5_0", VKD3D_SHADER_TYPE_HULL, 5, 0, 0, 0, false}, -+ {"hs_5_1", VKD3D_SHADER_TYPE_HULL, 5, 1, 0, 0, false}, - {"ps.1.0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false}, - {"ps.1.1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false}, - {"ps.1.2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false}, -@@ -3326,6 +3407,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) - {"ps_4_0_level_9_3", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 3, false}, - {"ps_4_1", VKD3D_SHADER_TYPE_PIXEL, 4, 1, 0, 0, false}, - {"ps_5_0", VKD3D_SHADER_TYPE_PIXEL, 5, 0, 0, 0, false}, -+ {"ps_5_1", VKD3D_SHADER_TYPE_PIXEL, 5, 1, 0, 0, false}, - {"tx_1_0", VKD3D_SHADER_TYPE_TEXTURE, 1, 0, 0, 0, false}, - {"vs.1.0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false}, - {"vs.1.1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false}, -@@ -3347,6 +3429,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) - {"vs_4_0_level_9_3", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 3, false}, - {"vs_4_1", VKD3D_SHADER_TYPE_VERTEX, 4, 1, 0, 0, false}, - {"vs_5_0", VKD3D_SHADER_TYPE_VERTEX, 5, 0, 0, 0, false}, -+ {"vs_5_1", VKD3D_SHADER_TYPE_VERTEX, 5, 1, 0, 0, false}, - }; - - for (i = 0; i < ARRAY_SIZE(profiles); ++i) -@@ -3651,6 +3734,21 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - - rb_destroy(&ctx->functions, free_function_rb, NULL); - -+ /* State blocks must be free before the variables, because they contain instructions that may -+ * refer to them. */ -+ LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ for (i = 0; i < var->state_block_count; ++i) -+ hlsl_free_state_block(var->state_blocks[i]); -+ vkd3d_free(var->state_blocks); -+ var->state_blocks = NULL; -+ var->state_block_count = 0; -+ var->state_block_capacity = 0; -+ } -+ } -+ - LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) - { - LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index aa9cb14fc8d..4225098bc87 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -298,6 +298,7 @@ enum hlsl_ir_node_type - HLSL_IR_STORE, - HLSL_IR_SWIZZLE, - HLSL_IR_SWITCH, -+ HLSL_IR_STATEBLOCK_CONSTANT, - }; - - /* Common data for every type of IR instruction node. */ -@@ -423,6 +424,14 @@ struct hlsl_ir_var - /* Scope that contains annotations for this variable. */ - struct hlsl_scope *annotations; - -+ /* A dynamic array containing the state block on the variable's declaration, if any. -+ * An array variable may contain multiple state blocks. -+ * A technique pass will always contain one. -+ * These are only really used for effect profiles. */ -+ struct hlsl_state_block **state_blocks; -+ unsigned int state_block_count; -+ size_t state_block_capacity; -+ - /* Indexes of the IR instructions where the variable is first written and last read (liveness - * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 - * means function entry. */ -@@ -458,6 +467,38 @@ struct hlsl_ir_var - uint32_t is_separated_resource : 1; - }; - -+/* This struct is used to represent assignments in state block entries: -+ * name = {args[0], args[1], ...}; -+ * - or - -+ * name = args[0] -+ * - or - -+ * name[lhs_index] = args[0] -+ * - or - -+ * name[lhs_index] = {args[0], args[1], ...}; -+ */ -+struct hlsl_state_block_entry -+{ -+ /* For assignments, the name in the lhs. */ -+ char *name; -+ -+ /* Whether the lhs in the assignment is indexed and, in that case, its index. */ -+ bool lhs_has_index; -+ unsigned int lhs_index; -+ -+ /* Instructions present in the rhs. */ -+ struct hlsl_block *instrs; -+ -+ /* For assignments, arguments of the rhs initializer. */ -+ struct hlsl_ir_node **args; -+ unsigned int args_count; -+}; -+ -+struct hlsl_state_block -+{ -+ struct hlsl_state_block_entry **entries; -+ size_t count, capacity; -+}; -+ - /* Sized array of variables representing a function's parameters. */ - struct hlsl_func_parameters - { -@@ -601,14 +642,9 @@ enum hlsl_ir_expr_op - /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, - * then adds c. */ - HLSL_OP3_DP2ADD, -- /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. -- * TERNARY(a, b, c) returns c if a == 0 and b otherwise. -- * They differ for floating point numbers, because -- * -0.0 == 0.0, but it is not bitwise zero. CMP(a, b, c) returns b -- if a >= 0, and c otherwise. It's used only for SM1-SM3 targets, while -- SM4+ is using MOVC in such cases. */ -+ /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. -+ * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ - HLSL_OP3_CMP, -- HLSL_OP3_MOVC, - HLSL_OP3_TERNARY, - }; - -@@ -754,6 +790,14 @@ struct hlsl_ir_constant - struct hlsl_reg reg; - }; - -+/* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, -+ * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ -+struct hlsl_ir_stateblock_constant -+{ -+ struct hlsl_ir_node node; -+ char *name; -+}; -+ - struct hlsl_scope - { - /* Item entry for hlsl_ctx.scopes. */ -@@ -932,6 +976,16 @@ struct hlsl_ctx - bool warn_implicit_truncation; - }; - -+static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); -+} -+ -+static inline bool hlsl_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return !hlsl_version_ge(ctx, major, minor); -+} -+ - struct hlsl_resource_load_params - { - struct hlsl_type *format; -@@ -1019,6 +1073,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n - return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); - } - -+static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) -+{ -+ assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); -+ return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); -+} -+ - static inline void hlsl_block_init(struct hlsl_block *block) - { - list_init(&block->instrs); -@@ -1211,6 +1271,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); - void hlsl_free_attribute(struct hlsl_attribute *attr); - void hlsl_free_instr(struct hlsl_ir_node *node); - void hlsl_free_instr_list(struct list *list); -+void hlsl_free_state_block(struct hlsl_state_block *state_block); - void hlsl_free_type(struct hlsl_type *type); - void hlsl_free_var(struct hlsl_ir_var *decl); - -@@ -1292,6 +1353,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - struct hlsl_struct_field *fields, size_t field_count); - struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, -+ struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, - struct hlsl_type *type, const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index e02e0c540f9..8f71556757a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -77,6 +77,10 @@ struct parse_variable_def - struct hlsl_type *basic_type; - uint32_t modifiers; - struct vkd3d_shader_location modifiers_loc; -+ -+ struct hlsl_state_block **state_blocks; -+ unsigned int state_block_count; -+ size_t state_block_capacity; - }; - - struct parse_function -@@ -114,6 +118,12 @@ struct parse_attribute_list - const struct hlsl_attribute **attrs; - }; - -+struct state_block_index -+{ -+ bool has_index; -+ unsigned int index; -+}; -+ - } - - %code provides -@@ -931,24 +941,10 @@ static void free_parse_variable_def(struct parse_variable_def *v) - vkd3d_free(v->arrays.sizes); - vkd3d_free(v->name); - hlsl_cleanup_semantic(&v->semantic); -+ assert(!v->state_blocks); - vkd3d_free(v); - } - --static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) --{ -- return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; --} -- --static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) --{ -- return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); --} -- --static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) --{ -- return !shader_profile_version_ge(ctx, major, minor); --} -- - static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - struct hlsl_type *type, uint32_t modifiers, struct list *defs) - { -@@ -971,7 +967,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - - field->type = type; - -- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) -+ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) - { - for (k = 0; k < v->arrays.count; ++k) - unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -1121,7 +1117,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters - } - - static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations, -- const struct vkd3d_shader_location *loc) -+ struct hlsl_state_block *state_block, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_var *var; - struct hlsl_type *type; -@@ -1131,6 +1127,11 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope * - return false; - var->annotations = annotations; - -+ var->state_blocks = hlsl_alloc(ctx, sizeof(*var->state_blocks)); -+ var->state_blocks[0] = state_block; -+ var->state_block_count = 1; -+ var->state_block_capacity = 1; -+ - if (!hlsl_add_var(ctx, var, false)) - { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -@@ -1216,7 +1217,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const - struct hlsl_reg_reservation reservation = {0}; - char *endptr; - -- if (shader_profile_version_lt(ctx, 4, 0)) -+ if (hlsl_version_lt(ctx, 4, 0)) - return reservation; - - reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); -@@ -1299,6 +1300,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_STORE: - case HLSL_IR_SWITCH: -+ case HLSL_IR_STATEBLOCK_CONSTANT: - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); - } -@@ -2177,7 +2179,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - - type = basic_type; - -- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) -+ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) - { - for (i = 0; i < v->arrays.count; ++i) - unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -2362,8 +2364,25 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - free_parse_variable_def(v); - continue; - } -+ - type = var->data_type; - -+ var->state_blocks = v->state_blocks; -+ var->state_block_count = v->state_block_count; -+ var->state_block_capacity = v->state_block_capacity; -+ v->state_block_count = 0; -+ v->state_block_capacity = 0; -+ v->state_blocks = NULL; -+ -+ if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected %u state blocks, but got %u.", -+ hlsl_type_component_count(type), var->state_block_count); -+ free_parse_variable_def(v); -+ continue; -+ } -+ - if (v->initializer.args_count) - { - if (v->initializer.braces) -@@ -2663,12 +2682,14 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, - static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -+ enum hlsl_base_type base_type; - struct hlsl_type *type; - - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; - -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ base_type = type->base_type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; -+ type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); - - return convert_args(ctx, params, type, loc); - } -@@ -2728,81 +2749,62 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, - return write_acos_or_asin(ctx, params, loc, false); - } - --static bool intrinsic_all(struct hlsl_ctx *ctx, -- const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+/* Find the type corresponding to the given source type, with the same -+ * dimensions but a different base type. */ -+static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, -+ const struct hlsl_type *type, enum hlsl_base_type base_type) - { -- struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; -+ return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); -+} -+ -+static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, -+ struct hlsl_ir_node *arg, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *res, *load; - unsigned int i, count; - -- if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, one); -+ count = hlsl_type_component_count(arg->data_type); - -- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) -+ if (!(res = hlsl_add_load_component(ctx, params->instrs, arg, 0, loc))) - return false; -- hlsl_block_add_instr(params->instrs, zero); - -- mul = one; -- -- count = hlsl_type_component_count(arg->data_type); -- for (i = 0; i < count; ++i) -+ for (i = 1; i < count; ++i) - { - if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - -- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) -- return false; -+ if (!(res = hlsl_new_binary_expr(ctx, op, res, load))) -+ return NULL; -+ hlsl_block_add_instr(params->instrs, res); - } - -- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); -+ return true; - } - --static bool intrinsic_any(struct hlsl_ctx *ctx, -+static bool intrinsic_all(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; -- unsigned int i, count; -+ struct hlsl_ir_node *arg = params->args[0], *cast; -+ struct hlsl_type *bool_type; - -- if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) -- { -- hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); -+ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); -+ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) - return false; -- } -- -- if (arg->data_type->base_type == HLSL_TYPE_FLOAT) -- { -- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, zero); -- -- if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) -- return false; - -- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); -- } -- else if (arg->data_type->base_type == HLSL_TYPE_BOOL) -- { -- if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, bfalse); -- -- or = bfalse; -- -- count = hlsl_type_component_count(arg->data_type); -- for (i = 0; i < count; ++i) -- { -- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) -- return false; -+ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); -+} - -- if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) -- return false; -- } -+static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, -+ const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg = params->args[0], *cast; -+ struct hlsl_type *bool_type; - -- return true; -- } -+ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); -+ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) -+ return false; - -- hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); -- return false; -+ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); - } - - static bool intrinsic_asin(struct hlsl_ctx *ctx, -@@ -2870,20 +2872,20 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, - type->name, type->name, type->name); - if (ret < 0) - { -- vkd3d_string_buffer_cleanup(buf); -+ hlsl_release_string_buffer(ctx, buf); - return false; - } - - ret = vkd3d_string_buffer_printf(buf, body_template, type->name); - if (ret < 0) - { -- vkd3d_string_buffer_cleanup(buf); -+ hlsl_release_string_buffer(ctx, buf); - return false; - } - - func = hlsl_compile_internal_function(ctx, - atan2_mode ? atan2_name : atan_name, buf->buffer); -- vkd3d_string_buffer_cleanup(buf); -+ hlsl_release_string_buffer(ctx, buf); - if (!func) - return false; - -@@ -2903,15 +2905,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx, - return write_atan_or_atan2(ctx, params, loc, true); - } - -- --/* Find the type corresponding to the given source type, with the same -- * dimensions but a different base type. */ --static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, -- const struct hlsl_type *type, enum hlsl_base_type base_type) --{ -- return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); --} -- - static bool intrinsic_asfloat(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3035,6 +3028,46 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); - } - -+static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool sinh_mode) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_ir_node *arg; -+ const char *fn_name, *type_name; -+ char *body; -+ -+ static const char template[] = -+ "%s %s(%s x)\n" -+ "{\n" -+ " return (exp(x) %s exp(-x)) / 2;\n" -+ "}\n"; -+ static const char fn_name_sinh[] = "sinh"; -+ static const char fn_name_cosh[] = "cosh"; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ type_name = arg->data_type->name; -+ fn_name = sinh_mode ? fn_name_sinh : fn_name_cosh; -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type_name, fn_name, type_name, sinh_mode ? "-" : "+"))) -+ return false; -+ -+ func = hlsl_compile_internal_function(ctx, fn_name, body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ -+static bool intrinsic_cosh(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return write_cosh_or_sinh(ctx, params, loc, false); -+} -+ - static bool intrinsic_cross(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3747,6 +3780,59 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, i, neg, loc); - } - -+static bool intrinsic_refract(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_type *r_type = params->args[0]->data_type; -+ struct hlsl_type *n_type = params->args[1]->data_type; -+ struct hlsl_type *i_type = params->args[2]->data_type; -+ struct hlsl_type *res_type, *idx_type, *scal_type; -+ struct parse_initializer mut_params; -+ struct hlsl_ir_function_decl *func; -+ enum hlsl_base_type base; -+ char *body; -+ -+ static const char template[] = -+ "%s refract(%s r, %s n, %s i)\n" -+ "{\n" -+ " %s d, t;\n" -+ " d = dot(r, n);\n" -+ " t = 1 - i.x * i.x * (1 - d * d);\n" -+ " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" -+ "}"; -+ -+ if (r_type->class == HLSL_CLASS_MATRIX -+ || n_type->class == HLSL_CLASS_MATRIX -+ || i_type->class == HLSL_CLASS_MATRIX) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported."); -+ return false; -+ } -+ -+ assert(params->args_count == 3); -+ mut_params = *params; -+ mut_params.args_count = 2; -+ if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) -+ return false; -+ -+ base = expr_common_base_type(res_type->base_type, i_type->base_type); -+ base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; -+ res_type = convert_numeric_type(ctx, res_type, base); -+ idx_type = convert_numeric_type(ctx, i_type, base); -+ scal_type = hlsl_get_scalar_type(ctx, base); -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name, -+ res_type->name, idx_type->name, scal_type->name))) -+ return false; -+ -+ func = hlsl_compile_internal_function(ctx, "refract", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ - static bool intrinsic_round(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3827,6 +3913,12 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); - } - -+static bool intrinsic_sinh(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return write_cosh_or_sinh(ctx, params, loc, true); -+} -+ - /* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ - static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -@@ -3899,6 +3991,39 @@ static bool intrinsic_tan(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, sin, cos, loc); - } - -+static bool intrinsic_tanh(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_ir_node *arg; -+ struct hlsl_type *type; -+ char *body; -+ -+ static const char template[] = -+ "%s tanh(%s x)\n" -+ "{\n" -+ " %s exp_pos, exp_neg;\n" -+ " exp_pos = exp(x);\n" -+ " exp_neg = exp(-x);\n" -+ " return (exp_pos - exp_neg) / (exp_pos + exp_neg);\n" -+ "}\n"; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ type = arg->data_type; -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type->name, type->name, type->name))) -+ return false; -+ -+ func = hlsl_compile_internal_function(ctx, "tanh", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ - static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, - const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) - { -@@ -3967,7 +4092,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - return false; - } - -- if (shader_profile_version_ge(ctx, 4, 0)) -+ if (hlsl_version_ge(ctx, 4, 0)) - { - unsigned int count = hlsl_sampler_dim_count(dim); - struct hlsl_ir_node *divisor; -@@ -4014,7 +4139,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - return false; - - initialize_var_components(ctx, params->instrs, var, &idx, coords); -- if (shader_profile_version_ge(ctx, 4, 0)) -+ if (hlsl_version_ge(ctx, 4, 0)) - { - if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) - return false; -@@ -4200,7 +4325,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) - return false; - -- if (shader_profile_version_ge(ctx, 4, 0)) -+ if (hlsl_version_ge(ctx, 4, 0)) - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); - - return true; -@@ -4231,6 +4356,7 @@ intrinsic_functions[] = - {"clamp", 3, true, intrinsic_clamp}, - {"clip", 1, true, intrinsic_clip}, - {"cos", 1, true, intrinsic_cos}, -+ {"cosh", 1, true, intrinsic_cosh}, - {"cross", 2, true, intrinsic_cross}, - {"ddx", 1, true, intrinsic_ddx}, - {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, -@@ -4262,15 +4388,18 @@ intrinsic_functions[] = - {"pow", 2, true, intrinsic_pow}, - {"radians", 1, true, intrinsic_radians}, - {"reflect", 2, true, intrinsic_reflect}, -+ {"refract", 3, true, intrinsic_refract}, - {"round", 1, true, intrinsic_round}, - {"rsqrt", 1, true, intrinsic_rsqrt}, - {"saturate", 1, true, intrinsic_saturate}, - {"sign", 1, true, intrinsic_sign}, - {"sin", 1, true, intrinsic_sin}, -+ {"sinh", 1, true, intrinsic_sinh}, - {"smoothstep", 3, true, intrinsic_smoothstep}, - {"sqrt", 1, true, intrinsic_sqrt}, - {"step", 2, true, intrinsic_step}, - {"tan", 1, true, intrinsic_tan}, -+ {"tanh", 1, true, intrinsic_tanh}, - {"tex1D", -1, false, intrinsic_tex1D}, - {"tex2D", -1, false, intrinsic_tex2D}, - {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, -@@ -4405,26 +4534,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) - return false; - } -- else if (common_type->dimx == 1 && common_type->dimy == 1) -- { -- common_type = hlsl_get_numeric_type(ctx, cond_type->class, -- common_type->base_type, cond_type->dimx, cond_type->dimy); -- } -- else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) -+ else - { -- /* This condition looks wrong but is correct. -- * floatN is compatible with float1xN, but not with floatNx1. */ -- -- struct vkd3d_string_buffer *cond_string, *value_string; -+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, -+ cond_type->dimx, cond_type->dimy); -+ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) -+ return false; - -- cond_string = hlsl_type_to_string(ctx, cond_type); -- value_string = hlsl_type_to_string(ctx, common_type); -- if (cond_string && value_string) -- hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Ternary condition type '%s' is not compatible with value type '%s'.", -- cond_string->buffer, value_string->buffer); -- hlsl_release_string_buffer(ctx, cond_string); -- hlsl_release_string_buffer(ctx, value_string); -+ if (common_type->dimx == 1 && common_type->dimy == 1) -+ { -+ common_type = hlsl_get_numeric_type(ctx, cond_type->class, -+ common_type->base_type, cond_type->dimx, cond_type->dimy); -+ } -+ else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) -+ { -+ /* This condition looks wrong but is correct. -+ * floatN is compatible with float1xN, but not with floatNx1. */ -+ -+ struct vkd3d_string_buffer *cond_string, *value_string; -+ -+ cond_string = hlsl_type_to_string(ctx, cond_type); -+ value_string = hlsl_type_to_string(ctx, common_type); -+ if (cond_string && value_string) -+ hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Ternary condition type '%s' is not compatible with value type '%s'.", -+ cond_string->buffer, value_string->buffer); -+ hlsl_release_string_buffer(ctx, cond_string); -+ hlsl_release_string_buffer(ctx, value_string); -+ } - } - - if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) -@@ -4449,9 +4586,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - hlsl_release_string_buffer(ctx, second_string); - } - -+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, -+ cond_type->dimx, cond_type->dimy); -+ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) -+ return false; -+ - common_type = first->data_type; - } - -+ assert(cond->data_type->base_type == HLSL_TYPE_BOOL); -+ - args[0] = cond; - args[1] = first; - args[2] = second; -@@ -5280,6 +5424,16 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - hlsl_release_string_buffer(ctx, string); - } - -+static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry) -+{ -+ if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1, -+ sizeof(*state_block->entries))) -+ return false; -+ -+ state_block->entries[state_block->count++] = entry; -+ return true; -+} -+ - } - - %locations -@@ -5320,6 +5474,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - struct parse_attribute_list attr_list; - struct hlsl_ir_switch_case *switch_case; - struct hlsl_scope *scope; -+ struct hlsl_state_block *state_block; -+ struct state_block_index state_block_index; - } - - %token KW_BLENDSTATE -@@ -5519,6 +5675,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - - %type any_identifier - %type var_identifier -+%type stateblock_lhs_identifier - %type name_opt - - %type parameter -@@ -5533,6 +5690,10 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - - %type semantic - -+%type state_block -+ -+%type state_block_index_opt -+ - %type switch_case - - %type field_type -@@ -5543,6 +5704,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %type type_no_void - %type typedef_type - -+%type state_block_list - %type type_spec - %type variable_decl - %type variable_def -@@ -5573,9 +5735,9 @@ name_opt: - | any_identifier - - pass: -- KW_PASS name_opt annotations_opt '{' '}' -+ KW_PASS name_opt annotations_opt '{' state_block_start state_block '}' - { -- if (!add_pass(ctx, $2, $3, &@1)) -+ if (!add_pass(ctx, $2, $3, $6, &@1)) - YYABORT; - } - -@@ -6474,7 +6636,7 @@ type_no_void: - { - validate_texture_format_type(ctx, $3, &@3); - -- if (shader_profile_version_lt(ctx, 4, 1)) -+ if (hlsl_version_lt(ctx, 4, 1)) - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); -@@ -6513,7 +6675,7 @@ type_no_void: - $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); - if ($$->is_minimum_precision) - { -- if (shader_profile_version_lt(ctx, 4, 0)) -+ if (hlsl_version_lt(ctx, 4, 0)) - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support minimum-precision types."); -@@ -6677,22 +6839,91 @@ variable_decl: - $$->reg_reservation = $3.reg_reservation; - } - --state: -- any_identifier '=' expr ';' -+state_block_start: -+ %empty - { -- vkd3d_free($1); -- destroy_block($3); -+ ctx->in_state_block = 1; - } - --state_block_start: -+stateblock_lhs_identifier: -+ any_identifier -+ { -+ $$ = $1; -+ } -+ | KW_PIXELSHADER -+ { -+ if (!($$ = hlsl_strdup(ctx, "pixelshader"))) -+ YYABORT; -+ } -+ | KW_VERTEXSHADER -+ { -+ if (!($$ = hlsl_strdup(ctx, "vertexshader"))) -+ YYABORT; -+ } -+ -+state_block_index_opt: - %empty - { -- ctx->in_state_block = 1; -+ $$.has_index = false; -+ $$.index = 0; - } -+ | '[' C_INTEGER ']' -+ { -+ if ($2 < 0) -+ { -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, -+ "State block array index is not a positive integer constant."); -+ YYABORT; -+ } -+ $$.has_index = true; -+ $$.index = $2; -+ } - - state_block: - %empty -- | state_block state -+ { -+ if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) -+ YYABORT; -+ } -+ | state_block stateblock_lhs_identifier state_block_index_opt '=' complex_initializer ';' -+ { -+ struct hlsl_state_block_entry *entry; -+ -+ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) -+ YYABORT; -+ -+ entry->name = $2; -+ entry->lhs_has_index = $3.has_index; -+ entry->lhs_index = $3.index; -+ -+ entry->instrs = $5.instrs; -+ entry->args = $5.args; -+ entry->args_count = $5.args_count; -+ -+ $$ = $1; -+ state_block_add_entry($$, entry); -+ } -+ -+state_block_list: -+ '{' state_block '}' -+ { -+ if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) -+ YYABORT; -+ -+ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, -+ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) -+ YYABORT; -+ $$->state_blocks[$$->state_block_count++] = $2; -+ } -+ | state_block_list ',' '{' state_block '}' -+ { -+ $$ = $1; -+ -+ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, -+ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) -+ YYABORT; -+ $$->state_blocks[$$->state_block_count++] = $4; -+ } - - variable_def: - variable_decl -@@ -6705,6 +6936,24 @@ variable_def: - { - $$ = $1; - ctx->in_state_block = 0; -+ -+ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, -+ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) -+ YYABORT; -+ $$->state_blocks[$$->state_block_count++] = $4; -+ } -+ | variable_decl '{' state_block_start state_block_list '}' -+ { -+ $$ = $1; -+ ctx->in_state_block = 0; -+ -+ $$->state_blocks = $4->state_blocks; -+ $$->state_block_count = $4->state_block_count; -+ $$->state_block_capacity = $4->state_block_capacity; -+ $4->state_blocks = NULL; -+ $4->state_block_count = 0; -+ $4->state_block_capacity = 0; -+ free_parse_variable_def($4); - } - - variable_def_typed: -@@ -7330,15 +7579,13 @@ primary_expr: - { - if (ctx->in_state_block) - { -- struct hlsl_ir_load *load; -- struct hlsl_ir_var *var; -+ struct hlsl_ir_node *constant; - -- if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", -- hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) -+ if (!(constant = hlsl_new_stateblock_constant(ctx, $1, &@1))) - YYABORT; -- if (!(load = hlsl_new_var_load(ctx, var, &@1))) -- YYABORT; -- if (!($$ = make_block(ctx, &load->node))) -+ vkd3d_free($1); -+ -+ if (!($$ = make_block(ctx, constant))) - YYABORT; - } - else -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 6f2de93767b..eaa72836d8a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -1565,7 +1565,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, - var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), - new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); - -- if (instr->data_type->class != HLSL_CLASS_OBJECT) -+ if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR) - { - struct hlsl_ir_node *swizzle_node; - -@@ -1742,7 +1742,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s - { - unsigned int writemask = store->writemask; - -- if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) -+ if (!hlsl_is_numeric_type(store->rhs.node->data_type)) - writemask = VKD3DSP_WRITEMASK_0; - copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index); - } -@@ -2954,12 +2954,11 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st - return true; - } - --/* Use movc/cmp for the ternary operator. */ -+/* Lower TERNARY to CMP for SM1. */ - static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; -- struct hlsl_ir_node *zero, *cond, *first, *second; -- struct hlsl_constant_value zero_value = { 0 }; -+ struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg; - struct hlsl_ir_expr *expr; - struct hlsl_type *type; - -@@ -2980,48 +2979,25 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return false; - } - -- if (ctx->profile->major_version < 4) -- { -- struct hlsl_ir_node *abs, *neg; -+ assert(cond->data_type->base_type == HLSL_TYPE_BOOL); - -- if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, abs); -+ type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, -+ instr->data_type->dimx, instr->data_type->dimy); - -- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, neg); -- -- operands[0] = neg; -- operands[1] = second; -- operands[2] = first; -- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) -- return false; -- } -- else -- { -- if (cond->data_type->base_type == HLSL_TYPE_FLOAT) -- { -- if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, zero); -+ if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, float_cond); - -- operands[0] = zero; -- operands[1] = cond; -- type = cond->data_type; -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); -- if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, cond); -- } -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, float_cond, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg); - -- memset(operands, 0, sizeof(operands)); -- operands[0] = cond; -- operands[1] = first; -- operands[2] = second; -- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) -- return false; -- } -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = neg; -+ operands[1] = second; -+ operands[2] = first; -+ if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) -+ return false; - - hlsl_block_add_instr(block, replacement); - return true; -@@ -3319,11 +3295,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) - { -+ struct hlsl_type *cond_type = condition->data_type; - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; - struct hlsl_ir_node *cond; - - assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); - -+ if (cond_type->base_type != HLSL_TYPE_BOOL) -+ { -+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); -+ -+ if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) -+ return NULL; -+ hlsl_block_add_instr(instrs, condition); -+ } -+ - operands[0] = condition; - operands[1] = if_true; - operands[2] = if_false; -@@ -3760,6 +3746,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_SWITCH: - break; -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ /* Stateblock constants should not appear in the shader program. */ -+ vkd3d_unreachable(); - } - - return false; -@@ -3848,6 +3837,22 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - } - } - -+static void deref_mark_last_read(struct hlsl_deref *deref, unsigned int last_read) -+{ -+ unsigned int i; -+ -+ if (hlsl_deref_is_lowered(deref)) -+ { -+ if (deref->rel_offset.node) -+ deref->rel_offset.node->last_read = last_read; -+ } -+ else -+ { -+ for (i = 0; i < deref->path_len; ++i) -+ deref->path[i].node->last_read = last_read; -+ } -+} -+ - /* Compute the earliest and latest liveness for each variable. In the case that - * a variable is accessed inside of a loop, we promote its liveness to extend - * to at least the range of the entire loop. We also do this for nodes, so that -@@ -3867,6 +3872,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - case HLSL_IR_CALL: - /* We should have inlined all calls before computing liveness. */ - vkd3d_unreachable(); -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ /* Stateblock constants should not appear in the shader program. */ -+ vkd3d_unreachable(); - - case HLSL_IR_STORE: - { -@@ -3876,8 +3884,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - if (!var->first_write) - var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; - store->rhs.node->last_read = last_read; -- if (store->lhs.rel_offset.node) -- store->lhs.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&store->lhs, last_read); - break; - } - case HLSL_IR_EXPR: -@@ -3904,8 +3911,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - - var = load->src.var; - var->last_read = max(var->last_read, last_read); -- if (load->src.rel_offset.node) -- load->src.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&load->src, last_read); - break; - } - case HLSL_IR_LOOP: -@@ -3922,14 +3928,12 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - - var = load->resource.var; - var->last_read = max(var->last_read, last_read); -- if (load->resource.rel_offset.node) -- load->resource.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&load->resource, last_read); - - if ((var = load->sampler.var)) - { - var->last_read = max(var->last_read, last_read); -- if (load->sampler.rel_offset.node) -- load->sampler.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&load->sampler, last_read); - } - - if (load->coords.node) -@@ -3954,8 +3958,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - - var = store->resource.var; - var->last_read = max(var->last_read, last_read); -- if (store->resource.rel_offset.node) -- store->resource.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&store->resource, last_read); - store->coords.node->last_read = last_read; - store->value.node->last_read = last_read; - break; -@@ -4790,7 +4793,9 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) - continue; - - if (var1->reg_reservation.offset_type -- || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) -+ || var1->reg_reservation.reg_type == 's' -+ || var1->reg_reservation.reg_type == 't' -+ || var1->reg_reservation.reg_type == 'u') - buffer->manually_packed_elements = true; - else - buffer->automatically_packed_elements = true; -@@ -5400,11 +5405,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); - - lower_ir(ctx, lower_narrowing_casts, body); -- lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); - lower_ir(ctx, lower_int_division, body); - lower_ir(ctx, lower_int_modulus, body); - lower_ir(ctx, lower_int_abs, body); -+ lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_float_modulus, body); - hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do -@@ -5420,13 +5425,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - remove_unreachable_code(ctx, body); - hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); - -- if (profile-> major_version < 4) -- { -- lower_ir(ctx, lower_nonfloat_exprs, body); -- /* Constants casted to float must be folded. */ -- hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -- } -- - lower_ir(ctx, lower_nonconstant_vector_derefs, body); - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); -@@ -5438,9 +5436,15 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); - sort_synthetic_separated_samplers_first(ctx); - -- lower_ir(ctx, lower_ternary, body); - if (profile->major_version < 4) - { -+ lower_ir(ctx, lower_ternary, body); -+ -+ lower_ir(ctx, lower_nonfloat_exprs, body); -+ /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ -+ hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ lower_ir(ctx, lower_casts_to_bool, body); -+ - lower_ir(ctx, lower_casts_to_int, body); - lower_ir(ctx, lower_division, body); - lower_ir(ctx, lower_sqrt, body); -@@ -5463,6 +5467,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - - lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); - -+ do -+ compute_liveness(ctx, entry_func); -+ while (hlsl_transform_ir(ctx, dce, body, NULL)); -+ - /* TODO: move forward, remove when no longer needed */ - transform_derefs(ctx, replace_deref_path_with_offset, body); - while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index b76b1fce507..4cea98e9286 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -1177,30 +1177,11 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - assert(dst_type->base_type == src2->node.data_type->base_type); - assert(dst_type->base_type == src3->node.data_type->base_type); -+ assert(src1->node.data_type->base_type == HLSL_TYPE_BOOL); - - for (k = 0; k < dst_type->dimx; ++k) -- { -- switch (src1->node.data_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- dst->u[k] = src1->value.u[k].f != 0.0f ? src2->value.u[k] : src3->value.u[k]; -- break; -- -- case HLSL_TYPE_DOUBLE: -- dst->u[k] = src1->value.u[k].d != 0.0 ? src2->value.u[k] : src3->value.u[k]; -- break; -+ dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; - -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- case HLSL_TYPE_BOOL: -- dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; -- break; -- -- default: -- vkd3d_unreachable(); -- } -- } - return true; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 8af537390f9..610d907d981 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -17,6 +17,7 @@ - */ - - #include "vkd3d_shader_private.h" -+#include "vkd3d_types.h" - - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) - { -@@ -56,19 +57,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i - vsir_instruction_init(ins, &location, VKD3DSIH_NOP); - } - --static void remove_dcl_temps(struct vsir_program *program) --{ -- unsigned int i; -- -- for (i = 0; i < program->instructions.count; ++i) -- { -- struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -- -- if (ins->handler_idx == VKD3DSIH_DCL_TEMPS) -- vkd3d_shader_instruction_make_nop(ins); -- } --} -- - static bool vsir_instruction_init_with_params(struct vsir_program *program, - struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) -@@ -94,85 +82,163 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, - return true; - } - --static enum vkd3d_result vsir_program_lower_texkills(struct vsir_program *program) -+static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, -+ struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) - { -+ const unsigned int components_read = 3 + (program->shader_version.major >= 2); - struct vkd3d_shader_instruction_array *instructions = &program->instructions; -- struct vkd3d_shader_instruction *texkill_ins, *ins; -- unsigned int components_read = 3 + (program->shader_version.major >= 2); -- unsigned int tmp_idx = ~0u; -- unsigned int i, k; -- -- for (i = 0; i < instructions->count; ++i) -- { -- texkill_ins = &instructions->elements[i]; -+ size_t pos = texkill - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int j; - -- if (texkill_ins->handler_idx != VKD3DSIH_TEXKILL) -- continue; -+ if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- if (!shader_instruction_array_insert_at(instructions, i + 1, components_read + 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ if (*tmp_idx == ~0u) -+ *tmp_idx = program->temp_count++; - -- if (tmp_idx == ~0u) -- tmp_idx = program->temp_count++; -+ /* tmp = ins->dst[0] < 0 */ - -- /* tmp = ins->dst[0] < 0 */ -+ ins = &instructions->elements[pos + 1]; -+ if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_LTO, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- ins = &instructions->elements[i + 1]; -- if (!vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_LTO, 1, 2)) -+ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].reg.idx[0].offset = *tmp_idx; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; -+ -+ ins->src[0].reg = texkill->dst[0].reg; -+ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[1].reg.u.immconst_f32[0] = 0.0f; -+ ins->src[1].reg.u.immconst_f32[1] = 0.0f; -+ ins->src[1].reg.u.immconst_f32[2] = 0.0f; -+ ins->src[1].reg.u.immconst_f32[3] = 0.0f; -+ -+ /* tmp.x = tmp.x || tmp.y */ -+ /* tmp.x = tmp.x || tmp.z */ -+ /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ -+ -+ for (j = 1; j < components_read; ++j) -+ { -+ ins = &instructions->elements[pos + 1 + j]; -+ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_OR, 1, 2))) - return VKD3D_ERROR_OUT_OF_MEMORY; - - vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->dst[0].reg.idx[0].offset = tmp_idx; -- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; -+ ins->dst[0].reg.idx[0].offset = *tmp_idx; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; - -- ins->src[0].reg = texkill_ins->dst[0].reg; -- vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].reg.idx[0].offset = *tmp_idx; -+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[1].reg.u.immconst_f32[0] = 0.0f; -- ins->src[1].reg.u.immconst_f32[1] = 0.0f; -- ins->src[1].reg.u.immconst_f32[2] = 0.0f; -- ins->src[1].reg.u.immconst_f32[3] = 0.0f; -+ ins->src[1].reg.idx[0].offset = *tmp_idx; -+ ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j); -+ } - -- /* tmp.x = tmp.x || tmp.y */ -- /* tmp.x = tmp.x || tmp.z */ -- /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ -+ /* discard_nz tmp.x */ - -- for (k = 1; k < components_read; ++k) -- { -- ins = &instructions->elements[i + 1 + k]; -- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_OR, 1, 2))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins = &instructions->elements[pos + 1 + components_read]; -+ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_DISCARD, 0, 1))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; - -- vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->dst[0].reg.idx[0].offset = tmp_idx; -- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; -- -- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[0].reg.idx[0].offset = tmp_idx; -- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -- vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[1].reg.idx[0].offset = tmp_idx; -- ins->src[1].swizzle = vkd3d_shader_create_swizzle(k, k, k, k); -- } -+ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].reg.idx[0].offset = *tmp_idx; -+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - -- /* discard_nz tmp.x */ -+ /* Make the original instruction no-op */ -+ vkd3d_shader_instruction_make_nop(texkill); - -- ins = &instructions->elements[i + 1 + components_read]; -- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_DISCARD, 0, 1))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; -+ return VKD3D_OK; -+} - -- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[0].reg.idx[0].offset = tmp_idx; -- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+/* The Shader Model 5 Assembly documentation states: "If components of a mad -+ * instruction are tagged as precise, the hardware must execute a mad instruction -+ * or the exact equivalent, and it cannot split it into a multiply followed by an add." -+ * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is -+ * not fused for "precise" operations." -+ * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ -+static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program, -+ struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ struct vkd3d_shader_instruction *mul_ins, *add_ins; -+ size_t pos = mad - instructions->elements; -+ struct vkd3d_shader_dst_param *mul_dst; -+ -+ if (!(mad->flags & VKD3DSI_PRECISE_XYZW)) -+ return VKD3D_OK; -+ -+ if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (*tmp_idx == ~0u) -+ *tmp_idx = program->temp_count++; -+ -+ mul_ins = &instructions->elements[pos]; -+ add_ins = &instructions->elements[pos + 1]; -+ -+ mul_ins->handler_idx = VKD3DSIH_MUL; -+ mul_ins->src_count = 2; -+ -+ if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW; -+ -+ mul_dst = mul_ins->dst; -+ *add_ins->dst = *mul_dst; -+ -+ mul_dst->modifiers = 0; -+ vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1); -+ mul_dst->reg.dimension = add_ins->dst->reg.dimension; -+ mul_dst->reg.idx[0].offset = *tmp_idx; -+ -+ add_ins->src[0].reg = mul_dst->reg; -+ add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask); -+ add_ins->src[0].modifiers = 0; -+ add_ins->src[1] = mul_ins->src[2]; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ unsigned int tmp_idx = ~0u, i; -+ enum vkd3d_result ret; -+ -+ for (i = 0; i < instructions->count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &instructions->elements[i]; -+ -+ switch (ins->handler_idx) -+ { -+ case VKD3DSIH_TEXKILL: -+ if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) -+ return ret; -+ break; - -- /* Make the original instruction no-op */ -- vkd3d_shader_instruction_make_nop(texkill_ins); -+ case VKD3DSIH_MAD: -+ if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0) -+ return ret; -+ break; -+ -+ case VKD3DSIH_DCL_TEMPS: -+ vkd3d_shader_instruction_make_nop(ins); -+ break; -+ -+ default: -+ break; -+ } - } - - return VKD3D_OK; -@@ -2577,97 +2643,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) - } - } - -- /* Second subpass: creating new blocks might have broken -- * references in PHI instructions, so we use the block map to fix -- * them. */ -- current_label = 0; -- for (i = 0; i < ins_count; ++i) -- { -- struct vkd3d_shader_instruction *ins = &instructions[i]; -- struct vkd3d_shader_src_param *new_src; -- unsigned int j, l, new_src_count = 0; -- -- switch (ins->handler_idx) -- { -- case VKD3DSIH_LABEL: -- current_label = label_from_src_param(&ins->src[0]); -- continue; -- -- case VKD3DSIH_PHI: -- break; -- -- default: -- continue; -- } -- -- /* First count how many source parameters we need. */ -- for (j = 0; j < ins->src_count; j += 2) -- { -- unsigned int source_label = label_from_src_param(&ins->src[j + 1]); -- size_t k, match_count = 0; -- -- for (k = 0; k < map_count; ++k) -- { -- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; -- -- if (mapping->switch_label == source_label && mapping->target_label == current_label) -- match_count += 1; -- } -- -- new_src_count += (match_count != 0) ? 2 * match_count : 2; -- } -- -- assert(new_src_count >= ins->src_count); -- -- /* Allocate more source parameters if needed. */ -- if (new_src_count == ins->src_count) -- { -- new_src = ins->src; -- } -- else -- { -- if (!(new_src = vsir_program_get_src_params(program, new_src_count))) -- { -- ERR("Failed to allocate %u source parameters.\n", new_src_count); -- goto fail; -- } -- } -- -- /* Then do the copy. */ -- for (j = 0, l = 0; j < ins->src_count; j += 2) -- { -- unsigned int source_label = label_from_src_param(&ins->src[j + 1]); -- size_t k, match_count = 0; -- -- for (k = 0; k < map_count; ++k) -- { -- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; -- -- if (mapping->switch_label == source_label && mapping->target_label == current_label) -- { -- match_count += 1; -- -- new_src[l] = ins->src[j]; -- new_src[l + 1] = ins->src[j + 1]; -- new_src[l + 1].reg.idx[0].offset = mapping->if_label; -- l += 2; -- } -- } -- -- if (match_count == 0) -- { -- new_src[l] = ins->src[j]; -- new_src[l + 1] = ins->src[j + 1]; -- l += 2; -- } -- } -- -- assert(l == new_src_count); -- -- ins->src_count = new_src_count; -- ins->src = new_src; -- } -- - vkd3d_free(program->instructions.elements); - vkd3d_free(block_map); - program->instructions.elements = instructions; -@@ -2685,148 +2660,139 @@ fail: - return VKD3D_ERROR_OUT_OF_MEMORY; - } - --static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, -- struct vkd3d_shader_src_param *src); -+struct ssas_to_temps_alloc -+{ -+ unsigned int *table; -+ unsigned int next_temp_idx; -+}; - --/* This is idempotent: it can be safely applied more than once on the -- * same register. */ --static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg) -+static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count) - { -- unsigned int i; -+ size_t i = ssa_count * sizeof(*alloc->table); - -- if (reg->type == VKD3DSPR_SSA) -+ if (!(alloc->table = vkd3d_malloc(i))) - { -- reg->type = VKD3DSPR_TEMP; -- reg->idx[0].offset += program->temp_count; -+ ERR("Failed to allocate SSA table.\n"); -+ return false; - } -+ memset(alloc->table, 0xff, i); - -- for (i = 0; i < reg->idx_count; ++i) -- if (reg->idx[i].rel_addr) -- materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr); --} -- --static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, -- struct vkd3d_shader_dst_param *dst) --{ -- materialize_ssas_to_temps_process_reg(program, &dst->reg); --} -- --static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, -- struct vkd3d_shader_src_param *src) --{ -- materialize_ssas_to_temps_process_reg(program, &src->reg); -+ alloc->next_temp_idx = temp_count; -+ return true; - } - --static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, -- unsigned int label) -+/* This is idempotent: it can be safely applied more than once on the -+ * same register. */ -+static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc, -+ struct vkd3d_shader_register *reg) - { - unsigned int i; - -- assert(ins->handler_idx == VKD3DSIH_PHI); -- -- for (i = 0; i < ins->src_count; i += 2) -+ if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX) - { -- if (label_from_src_param(&ins->src[i + 1]) == label) -- return &ins->src[i]; -+ reg->type = VKD3DSPR_TEMP; -+ reg->idx[0].offset = alloc->table[reg->idx[0].offset]; - } - -- vkd3d_unreachable(); -+ for (i = 0; i < reg->idx_count; ++i) -+ if (reg->idx[i].rel_addr) -+ materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg); - } - --static bool materialize_ssas_to_temps_synthesize_mov(struct vsir_program *program, -- struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, -- const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, -- const struct vkd3d_shader_src_param *source, bool invert) -+struct ssas_to_temps_block_info - { -- struct vkd3d_shader_src_param *src; -- struct vkd3d_shader_dst_param *dst; -- -- if (!vsir_instruction_init_with_params(program, instruction, loc, -- cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) -- return false; -- -- dst = instruction->dst; -- src = instruction->src; -- -- dst[0] = *dest; -- materialize_ssas_to_temps_process_dst_param(program, &dst[0]); -+ struct phi_incoming_to_temp -+ { -+ struct vkd3d_shader_src_param *src; -+ struct vkd3d_shader_dst_param *dst; -+ } *incomings; -+ size_t incoming_capacity; -+ size_t incoming_count; -+}; - -- assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); -- assert(dst[0].modifiers == 0); -- assert(dst[0].shift == 0); -+static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *block_info, -+ size_t count) -+{ -+ size_t i; - -- if (cond) -- { -- src[0] = *cond; -- src[1 + invert] = *source; -- memset(&src[2 - invert], 0, sizeof(src[2 - invert])); -- src[2 - invert].reg = dst[0].reg; -- materialize_ssas_to_temps_process_src_param(program, &src[1]); -- materialize_ssas_to_temps_process_src_param(program, &src[2]); -- } -- else -- { -- src[0] = *source; -- materialize_ssas_to_temps_process_src_param(program, &src[0]); -- } -+ for (i = 0; i < count; ++i) -+ vkd3d_free(block_info[i].incomings); - -- return true; -+ vkd3d_free(block_info); - } - --static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) -+static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) - { -+ size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; -+ struct ssas_to_temps_block_info *info, *block_info = NULL; - struct vkd3d_shader_instruction *instructions = NULL; -- struct materialize_ssas_to_temps_block_data -- { -- size_t phi_begin; -- size_t phi_count; -- } *block_index = NULL; -- size_t ins_capacity = 0, ins_count = 0, i; -+ struct ssas_to_temps_alloc alloc = {0}; - unsigned int current_label = 0; - -- if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) -- goto fail; -- -- if (!(block_index = vkd3d_calloc(program->block_count, sizeof(*block_index)))) -+ if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) - { -- ERR("Failed to allocate block index.\n"); -+ ERR("Failed to allocate block info array.\n"); - goto fail; - } - -- for (i = 0; i < program->instructions.count; ++i) -+ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) -+ goto fail; -+ -+ for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ unsigned int j, temp_idx; - -- switch (ins->handler_idx) -+ /* Only phi src/dst SSA values need be converted here. Structurisation may -+ * introduce new cases of undominated SSA use, which will be handled later. */ -+ if (ins->handler_idx != VKD3DSIH_PHI) -+ continue; -+ ++phi_count; -+ -+ temp_idx = alloc.next_temp_idx++; -+ -+ for (j = 0; j < ins->src_count; j += 2) - { -- case VKD3DSIH_LABEL: -- current_label = label_from_src_param(&ins->src[0]); -- break; -+ struct phi_incoming_to_temp *incoming; -+ unsigned int label; - -- case VKD3DSIH_PHI: -- assert(current_label != 0); -- assert(i != 0); -- if (block_index[current_label - 1].phi_begin == 0) -- block_index[current_label - 1].phi_begin = i; -- block_index[current_label - 1].phi_count += 1; -- break; -+ label = label_from_src_param(&ins->src[j + 1]); -+ assert(label); - -- default: -- current_label = 0; -- break; -+ info = &block_info[label - 1]; -+ -+ if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1, -+ sizeof(*info->incomings)))) -+ goto fail; -+ -+ incoming = &info->incomings[info->incoming_count++]; -+ incoming->src = &ins->src[j]; -+ incoming->dst = ins->dst; -+ -+ alloc.table[ins->dst->reg.idx[0].offset] = temp_idx; -+ -+ ++incoming_count; - } -+ -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg); - } - -+ if (!phi_count) -+ goto done; -+ -+ if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) -+ goto fail; -+ - for (i = 0; i < program->instructions.count; ++i) - { -- struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i]; - size_t j; - - for (j = 0; j < ins->dst_count; ++j) -- materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]); -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); - - for (j = 0; j < ins->src_count; ++j) -- materialize_ssas_to_temps_process_src_param(program, &ins->src[j]); -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); - - switch (ins->handler_idx) - { -@@ -2835,65 +2801,21 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog - break; - - case VKD3DSIH_BRANCH: -- { -- if (vsir_register_is_label(&ins->src[0].reg)) -- { -- const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1]; -- -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count)) -- goto fail; -- -- for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j) -- { -- const struct vkd3d_shader_src_param *source; -- -- source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], -- current_label); -- if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], -- &ins->location, &program->instructions.elements[j].dst[0], NULL, source, false)) -- goto fail; -+ case VKD3DSIH_SWITCH_MONOLITHIC: -+ info = &block_info[current_label - 1]; - -- ++ins_count; -- } -- } -- else -+ for (j = 0; j < info->incoming_count; ++j) - { -- struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1], -- *data_false = &block_index[label_from_src_param(&ins->src[2]) - 1]; -- const struct vkd3d_shader_src_param *cond = &ins->src[0]; -+ struct phi_incoming_to_temp *incoming = &info->incomings[j]; - -- if (!reserve_instructions(&instructions, &ins_capacity, -- ins_count + data_true->phi_count + data_false->phi_count)) -+ mov_ins = &instructions[ins_count++]; -+ if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0)) - goto fail; -- -- for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j) -- { -- const struct vkd3d_shader_src_param *source; -- -- source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], -- current_label); -- if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], -- &ins->location, &program->instructions.elements[j].dst[0], cond, source, false)) -- goto fail; -- -- ++ins_count; -- } -- -- for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j) -- { -- const struct vkd3d_shader_src_param *source; -- -- source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], -- current_label); -- if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], -- &ins->location, &program->instructions.elements[j].dst[0], cond, source, true)) -- goto fail; -- -- ++ins_count; -- } -+ *mov_ins->dst = *incoming->dst; -+ mov_ins->src = incoming->src; -+ mov_ins->src_count = 1; - } - break; -- } - - case VKD3DSIH_PHI: - continue; -@@ -2902,25 +2824,24 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog - break; - } - -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) -- goto fail; -- - instructions[ins_count++] = *ins; - } - - vkd3d_free(program->instructions.elements); -- vkd3d_free(block_index); - program->instructions.elements = instructions; - program->instructions.capacity = ins_capacity; - program->instructions.count = ins_count; -- program->temp_count += program->ssa_count; -- program->ssa_count = 0; -+ program->temp_count = alloc.next_temp_idx; -+done: -+ ssas_to_temps_block_info_cleanup(block_info, program->block_count); -+ vkd3d_free(alloc.table); - - return VKD3D_OK; - - fail: - vkd3d_free(instructions); -- vkd3d_free(block_index); -+ ssas_to_temps_block_info_cleanup(block_info, program->block_count); -+ vkd3d_free(alloc.table); - - return VKD3D_ERROR_OUT_OF_MEMORY; - } -@@ -3061,19 +2982,19 @@ struct vsir_cfg_structure - union - { - struct vsir_block *block; -- struct -+ struct vsir_cfg_structure_loop - { - struct vsir_cfg_structure_list body; - unsigned idx; - } loop; -- struct -+ struct vsir_cfg_structure_selection - { - struct vkd3d_shader_src_param *condition; - struct vsir_cfg_structure_list if_body; - struct vsir_cfg_structure_list else_body; - bool invert_condition; - } selection; -- struct -+ struct vsir_cfg_structure_jump - { - enum vsir_cfg_jump_type - { -@@ -3157,6 +3078,14 @@ static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) - } - } - -+struct vsir_cfg_emit_target -+{ -+ struct vkd3d_shader_instruction *instructions; -+ size_t ins_capacity, ins_count; -+ unsigned int jump_target_temp_idx; -+ unsigned int temp_count; -+}; -+ - struct vsir_cfg - { - struct vkd3d_shader_message_context *message_context; -@@ -3200,15 +3129,15 @@ struct vsir_cfg - * block), but we still try to keep `begin' as forward as - * possible, to keep the loop scope as small as possible. */ - bool synthetic; -+ /* The number of jump instructions (both conditional and -+ * unconditional) that target this loop. */ -+ unsigned int target_count; - } *loop_intervals; - size_t loop_interval_count, loop_interval_capacity; - - struct vsir_cfg_structure_list structured_program; - -- struct vkd3d_shader_instruction *instructions; -- size_t ins_capacity, ins_count; -- unsigned int jump_target_temp_idx; -- unsigned int temp_count; -+ struct vsir_cfg_emit_target *target; - }; - - static void vsir_cfg_cleanup(struct vsir_cfg *cfg) -@@ -3248,6 +3177,7 @@ static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsign - interval->begin = begin; - interval->end = end; - interval->synthetic = synthetic; -+ interval->target_count = 0; - - return VKD3D_OK; - } -@@ -3402,7 +3332,7 @@ static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) - } - - static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) -+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) - { - struct vsir_block *current_block = NULL; - enum vkd3d_result ret; -@@ -3412,6 +3342,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - cfg->message_context = message_context; - cfg->program = program; - cfg->block_count = program->block_count; -+ cfg->target = target; - - vsir_block_list_init(&cfg->order); - -@@ -4250,53 +4181,157 @@ fail: - return VKD3D_ERROR_OUT_OF_MEMORY; - } - --static void vsir_cfg_remove_trailing_continue(struct vsir_cfg_structure_list *list, unsigned int target) -+static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list, unsigned int target) - { - struct vsir_cfg_structure *last = &list->structures[list->count - 1]; - - if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE - && !last->u.jump.condition && last->u.jump.target == target) -+ { - --list->count; -+ assert(cfg->loop_intervals[target].target_count > 0); -+ --cfg->loop_intervals[target].target_count; -+ } - } - --static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structure_list *list) -+static struct vsir_cfg_structure *vsir_cfg_get_trailing_break(struct vsir_cfg_structure_list *list) - { -- enum vkd3d_result ret; -- size_t i; -+ struct vsir_cfg_structure *structure; -+ size_t count = list->count; - -- for (i = 0; i < list->count; ++i) -- { -- struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; -+ if (count == 0) -+ return NULL; - -- if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) -- continue; -+ structure = &list->structures[count - 1]; - -- vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); -- new_selection.u.selection.condition = structure->u.jump.condition; -- new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; -+ if (structure->type != STRUCTURE_TYPE_JUMP || structure->u.jump.type != JUMP_BREAK -+ || structure->u.jump.condition) -+ return NULL; - -- if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, -- STRUCTURE_TYPE_JUMP))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- new_jump->u.jump.type = structure->u.jump.type; -- new_jump->u.jump.target = structure->u.jump.target; -+ return structure; -+} - -- /* Move the rest of the structure list in the else branch -- * rather than leaving it after the selection construct. The -- * reason is that this is more conducive to further -- * optimization, because all the conditional `break's appear -- * as the last instruction of a branch of a cascade of -- * selection constructs at the end of the structure list we're -- * processing, instead of being buried in the middle of the -- * structure list itself. */ -- if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, -- &list->structures[i + 1], list->count - i - 1)) < 0) -- return ret; -+/* When the last instruction in both branches of a selection construct -+ * is an unconditional break, any of them can be moved after the -+ * selection construct. If they break the same loop both of them can -+ * be moved out, otherwise we can choose which one: we choose the one -+ * that breaks the innermost loop, because we hope to eventually -+ * remove the loop itself. -+ * -+ * In principle a similar movement could be done when the last -+ * instructions are continue and continue, or continue and break. But -+ * in practice I don't think those situations can happen given the -+ * previous passes we do on the program, so we don't care. */ -+static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list) -+{ -+ struct vsir_cfg_structure *selection, *if_break, *else_break, *new_break; -+ unsigned int if_target, else_target, max_target; -+ size_t pos = list->count - 1; -+ -+ selection = &list->structures[pos]; -+ assert(selection->type == STRUCTURE_TYPE_SELECTION); -+ -+ if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); -+ else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); -+ -+ if (!if_break || !else_break) -+ return VKD3D_OK; -+ -+ if_target = if_break->u.jump.target; -+ else_target = else_break->u.jump.target; -+ max_target = max(if_target, else_target); -+ -+ if (!(new_break = vsir_cfg_structure_list_append(list, STRUCTURE_TYPE_JUMP))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ new_break->u.jump.type = JUMP_BREAK; -+ new_break->u.jump.target = max_target; -+ ++cfg->loop_intervals[max_target].target_count; -+ -+ /* Pointer `selection' could have been invalidated by the append -+ * operation. */ -+ selection = &list->structures[pos]; -+ assert(selection->type == STRUCTURE_TYPE_SELECTION); -+ -+ if (if_target == max_target) -+ { -+ --selection->u.selection.if_body.count; -+ assert(cfg->loop_intervals[if_target].target_count > 0); -+ --cfg->loop_intervals[if_target].target_count; -+ } -+ -+ if (else_target == max_target) -+ { -+ --selection->u.selection.else_body.count; -+ assert(cfg->loop_intervals[else_target].target_count > 0); -+ --cfg->loop_intervals[else_target].target_count; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections_recursively(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list) -+{ -+ struct vsir_cfg_structure *trailing; -+ -+ if (list->count == 0) -+ return VKD3D_OK; -+ -+ trailing = &list->structures[list->count - 1]; -+ -+ if (trailing->type != STRUCTURE_TYPE_SELECTION) -+ return VKD3D_OK; -+ -+ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.if_body); -+ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.else_body); -+ -+ return vsir_cfg_move_breaks_out_of_selections(cfg, list); -+} -+ -+static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list) -+{ -+ enum vkd3d_result ret; -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; -+ -+ if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) -+ continue; -+ -+ vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); -+ new_selection.u.selection.condition = structure->u.jump.condition; -+ new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; -+ -+ if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, -+ STRUCTURE_TYPE_JUMP))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ new_jump->u.jump.type = structure->u.jump.type; -+ new_jump->u.jump.target = structure->u.jump.target; -+ -+ /* Move the rest of the structure list in the else branch -+ * rather than leaving it after the selection construct. The -+ * reason is that this is more conducive to further -+ * optimization, because all the conditional `break's appear -+ * as the last instruction of a branch of a cascade of -+ * selection constructs at the end of the structure list we're -+ * processing, instead of being buried in the middle of the -+ * structure list itself. */ -+ if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, -+ &list->structures[i + 1], list->count - i - 1)) < 0) -+ return ret; - - *structure = new_selection; - list->count = i + 1; - -- if ((ret = vsir_cfg_synthesize_selections(&structure->u.selection.else_body)) < 0) -+ if ((ret = vsir_cfg_synthesize_selections(cfg, &structure->u.selection.else_body)) < 0) -+ return ret; -+ -+ if ((ret = vsir_cfg_move_breaks_out_of_selections(cfg, list)) < 0) - return ret; - - break; -@@ -4305,40 +4340,164 @@ static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg_structur - return VKD3D_OK; - } - -+static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *new_list, struct vsir_cfg_structure *loop) -+{ -+ struct vsir_cfg_structure_list *loop_body = &loop->u.loop.body; -+ unsigned int target, loop_idx = loop->u.loop.idx; -+ struct vsir_cfg_structure *trailing_break; -+ enum vkd3d_result ret; -+ -+ trailing_break = vsir_cfg_get_trailing_break(loop_body); -+ -+ /* If the loop's last instruction is not a break, we cannot remove -+ * the loop itself. */ -+ if (!trailing_break) -+ { -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) -+ return ret; -+ memset(loop, 0, sizeof(*loop)); -+ return VKD3D_OK; -+ } -+ -+ target = trailing_break->u.jump.target; -+ assert(cfg->loop_intervals[target].target_count > 0); -+ -+ /* If the loop is not targeted by any jump, we can remove it. The -+ * trailing `break' then targets another loop, so we have to keep -+ * it. */ -+ if (cfg->loop_intervals[loop_idx].target_count == 0) -+ { -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, -+ &loop_body->structures[0], loop_body->count)) < 0) -+ return ret; -+ loop_body->count = 0; -+ return VKD3D_OK; -+ } -+ -+ /* If the loop is targeted only by its own trailing `break' -+ * instruction, then we can remove it together with the `break' -+ * itself. */ -+ if (target == loop_idx && cfg->loop_intervals[loop_idx].target_count == 1) -+ { -+ --cfg->loop_intervals[loop_idx].target_count; -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, -+ &loop_body->structures[0], loop_body->count - 1)) < 0) -+ return ret; -+ loop_body->count = 0; -+ return VKD3D_OK; -+ } -+ -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) -+ return ret; -+ memset(loop, 0, sizeof(*loop)); -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) - { -+ struct vsir_cfg_structure_list old_list = *list, *new_list = list; - enum vkd3d_result ret; - size_t i; - -- for (i = 0; i < list->count; ++i) -+ memset(new_list, 0, sizeof(*new_list)); -+ -+ for (i = 0; i < old_list.count; ++i) - { -- struct vsir_cfg_structure *loop = &list->structures[i]; -+ struct vsir_cfg_structure *loop = &old_list.structures[i], *selection; - struct vsir_cfg_structure_list *loop_body; - - if (loop->type != STRUCTURE_TYPE_LOOP) -+ { -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) -+ goto out; -+ memset(loop, 0, sizeof(*loop)); - continue; -+ } - - loop_body = &loop->u.loop.body; - - if (loop_body->count == 0) -+ { -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) -+ goto out; -+ memset(loop, 0, sizeof(*loop)); - continue; -+ } - -- vsir_cfg_remove_trailing_continue(loop_body, loop->u.loop.idx); -+ vsir_cfg_remove_trailing_continue(cfg, loop_body, loop->u.loop.idx); - - if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) -- return ret; -+ goto out; - -- if ((ret = vsir_cfg_synthesize_selections(loop_body)) < 0) -- return ret; -+ if ((ret = vsir_cfg_synthesize_selections(cfg, loop_body)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_append_loop(cfg, new_list, loop)) < 0) -+ goto out; -+ -+ /* If the last pushed instruction is a selection and one of the branches terminates with a -+ * `break', start pushing to the other branch, in the hope of eventually push a `break' -+ * there too and be able to remove a loop. */ -+ if (new_list->count == 0) -+ continue; -+ -+ selection = &new_list->structures[new_list->count - 1]; -+ -+ if (selection->type == STRUCTURE_TYPE_SELECTION) -+ { -+ if (vsir_cfg_get_trailing_break(&selection->u.selection.if_body)) -+ new_list = &selection->u.selection.else_body; -+ else if (vsir_cfg_get_trailing_break(&selection->u.selection.else_body)) -+ new_list = &selection->u.selection.if_body; -+ } - } - -- return VKD3D_OK; -+ ret = vsir_cfg_move_breaks_out_of_selections_recursively(cfg, list); -+ -+out: -+ vsir_cfg_structure_list_cleanup(&old_list); -+ -+ return ret; -+} -+ -+static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) -+{ -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i]; -+ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ break; -+ -+ case STRUCTURE_TYPE_LOOP: -+ vsir_cfg_count_targets(cfg, &structure->u.loop.body); -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ vsir_cfg_count_targets(cfg, &structure->u.selection.if_body); -+ vsir_cfg_count_targets(cfg, &structure->u.selection.else_body); -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ if (structure->u.jump.type == JUMP_BREAK || structure->u.jump.type == JUMP_CONTINUE) -+ ++cfg->loop_intervals[structure->u.jump.target].target_count; -+ break; -+ } -+ } - } - - static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) - { - enum vkd3d_result ret; - -+ vsir_cfg_count_targets(cfg, &cfg->structured_program); -+ - ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); - - if (TRACE_ON()) -@@ -4348,199 +4507,244 @@ static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) - } - - static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, -- struct vsir_cfg_structure_list *list, unsigned int loop_idx) -+ struct vsir_cfg_structure_list *list, unsigned int loop_idx); -+ -+static enum vkd3d_result vsir_cfg_structure_list_emit_block(struct vsir_cfg *cfg, -+ struct vsir_block *block) -+{ -+ struct vsir_cfg_emit_target *target = cfg->target; -+ -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, -+ target->ins_count + (block->end - block->begin))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ memcpy(&target->instructions[target->ins_count], block->begin, -+ (char *)block->end - (char *)block->begin); -+ -+ target->ins_count += block->end - block->begin; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_loop *loop, unsigned int loop_idx) - { -+ struct vsir_cfg_emit_target *target = cfg->target; - const struct vkd3d_shader_location no_loc = {0}; - enum vkd3d_result ret; -- size_t i; - -- for (i = 0; i < list->count; ++i) -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_LOOP); -+ -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &loop->body, loop->idx)) < 0) -+ return ret; -+ -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 5)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); -+ -+ /* Add a trampoline to implement multilevel jumping depending on the stored -+ * jump_target value. */ -+ if (loop_idx != UINT_MAX) - { -- struct vsir_cfg_structure *structure = &list->structures[i]; -+ /* If the multilevel jump is a `continue' and the target is the loop we're inside -+ * right now, then we can finally do the `continue'. */ -+ const unsigned int outer_continue_target = loop_idx << 1 | 1; -+ /* If the multilevel jump is a `continue' to any other target, or if it is a `break' -+ * and the target is not the loop we just finished emitting, then it means that -+ * we have to reach an outer loop, so we keep breaking. */ -+ const unsigned int inner_break_target = loop->idx << 1; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_IEQ, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- switch (structure->type) -- { -- case STRUCTURE_TYPE_BLOCK: -- { -- struct vsir_block *block = structure->u.block; -+ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); -+ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); -+ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], outer_continue_target); - -- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + (block->end - block->begin))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ++target->ins_count; - -- memcpy(&cfg->instructions[cfg->ins_count], block->begin, (char *)block->end - (char *)block->begin); -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- cfg->ins_count += block->end - block->begin; -- break; -- } -+ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); - -- case STRUCTURE_TYPE_LOOP: -- { -- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ++target->ins_count; -+ ++target->temp_count; - -- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_LOOP); -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_IEQ, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0) -- return ret; -+ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); -+ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); -+ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], inner_break_target); - -- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 5)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ++target->ins_count; - -- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_BREAKP, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; - -- /* Add a trampoline to implement multilevel jumping depending on the stored -- * jump_target value. */ -- if (loop_idx != UINT_MAX) -- { -- /* If the multilevel jump is a `continue' and the target is the loop we're inside -- * right now, then we can finally do the `continue'. */ -- const unsigned int outer_continue_target = loop_idx << 1 | 1; -- /* If the multilevel jump is a `continue' to any other target, or if it is a `break' -- * and the target is not the loop we just finished emitting, then it means that -- * we have to reach an outer loop, so we keep breaking. */ -- const unsigned int inner_break_target = structure->u.loop.idx << 1; -+ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); - -- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -- &no_loc, VKD3DSIH_IEQ, 1, 2)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ++target->ins_count; -+ ++target->temp_count; -+ } - -- dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); -- src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); -- src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], outer_continue_target); -+ return VKD3D_OK; -+} - -- ++cfg->ins_count; -+static enum vkd3d_result vsir_cfg_structure_list_emit_selection(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_selection *selection, unsigned int loop_idx) -+{ -+ struct vsir_cfg_emit_target *target = cfg->target; -+ const struct vkd3d_shader_location no_loc = {0}; -+ enum vkd3d_result ret; - -- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -- &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_IF, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- ++cfg->ins_count; -- ++cfg->temp_count; -+ target->instructions[target->ins_count].src[0] = *selection->condition; - -- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -- &no_loc, VKD3DSIH_IEQ, 1, 2)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ if (selection->invert_condition) -+ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; - -- dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); -- src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); -- src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], inner_break_target); -+ ++target->ins_count; - -- ++cfg->ins_count; -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->if_body, loop_idx)) < 0) -+ return ret; - -- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -- &no_loc, VKD3DSIH_BREAKP, 0, 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; -+ if (selection->else_body.count != 0) -+ { -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); -+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ELSE); - -- ++cfg->ins_count; -- ++cfg->temp_count; -- } -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->else_body, loop_idx)) < 0) -+ return ret; -+ } - -- break; -- } -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- case STRUCTURE_TYPE_SELECTION: -- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDIF); - -- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], &no_loc, -- VKD3DSIH_IF, 0, 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ return VKD3D_OK; -+} - -- cfg->instructions[cfg->ins_count].src[0] = *structure->u.selection.condition; -+static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_jump *jump, unsigned int loop_idx) -+{ -+ struct vsir_cfg_emit_target *target = cfg->target; -+ const struct vkd3d_shader_location no_loc = {0}; -+ /* Encode the jump target as the loop index plus a bit to remember whether -+ * we're breaking or continueing. */ -+ unsigned int jump_target = jump->target << 1; -+ enum vkd3d_shader_opcode opcode; - -- if (structure->u.selection.invert_condition) -- cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; -+ switch (jump->type) -+ { -+ case JUMP_CONTINUE: -+ /* If we're continueing the loop we're directly inside, then we can emit a -+ * `continue'. Otherwise we first have to break all the loops between here -+ * and the loop to continue, recording our intention to continue -+ * in the lowest bit of jump_target. */ -+ if (jump->target == loop_idx) -+ { -+ opcode = jump->condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; -+ break; -+ } -+ jump_target |= 1; -+ /* fall through */ - -- ++cfg->ins_count; -+ case JUMP_BREAK: -+ opcode = jump->condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; -+ break; - -- if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.if_body, loop_idx)) < 0) -- return ret; -+ case JUMP_RET: -+ assert(!jump->condition); -+ opcode = VKD3DSIH_RET; -+ break; - -- if (structure->u.selection.else_body.count != 0) -- { -- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ default: -+ vkd3d_unreachable(); -+ } - -- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ELSE); -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.selection.else_body, loop_idx)) < 0) -- return ret; -- } -+ if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) -+ { -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_MOV, 1, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ dst_param_init_temp_uint(&target->instructions[target->ins_count].dst[0], target->jump_target_temp_idx); -+ src_param_init_const_uint(&target->instructions[target->ins_count].src[0], jump_target); - -- vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDIF); -- break; -+ ++target->ins_count; -+ } - -- case STRUCTURE_TYPE_JUMP: -- { -- /* Encode the jump target as the loop index plus a bit to remember whether -- * we're breaking or continueing. */ -- unsigned int jump_target = structure->u.jump.target << 1; -- enum vkd3d_shader_opcode opcode; -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, opcode, 0, !!jump->condition)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- switch (structure->u.jump.type) -- { -- case JUMP_CONTINUE: -- /* If we're continueing the loop we're directly inside, then we can emit a -- * `continue'. Otherwise we first have to break all the loops between here -- * and the loop to continue, recording our intention to continue -- * in the lowest bit of jump_target. */ -- if (structure->u.jump.target == loop_idx) -- { -- opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; -- break; -- } -- jump_target |= 1; -- /* fall through */ -- -- case JUMP_BREAK: -- opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; -- break; -- -- case JUMP_RET: -- assert(!structure->u.jump.condition); -- opcode = VKD3DSIH_RET; -- break; -- -- default: -- vkd3d_unreachable(); -- } -+ if (jump->invert_condition) -+ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; - -- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 2)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ if (jump->condition) -+ target->instructions[target->ins_count].src[0] = *jump->condition; - -- if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) -- { -- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -- &no_loc, VKD3DSIH_MOV, 1, 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ++target->ins_count; - -- dst_param_init_temp_uint(&cfg->instructions[cfg->ins_count].dst[0], cfg->jump_target_temp_idx); -- src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[0], jump_target); -+ return VKD3D_OK; -+} - -- ++cfg->ins_count; -- } -+static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list, unsigned int loop_idx) -+{ -+ enum vkd3d_result ret; -+ size_t i; - -- if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], -- &no_loc, opcode, 0, !!structure->u.jump.condition)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i]; - -- if (structure->u.jump.invert_condition) -- cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ if ((ret = vsir_cfg_structure_list_emit_block(cfg, structure->u.block)) < 0) -+ return ret; -+ break; - -- if (structure->u.jump.condition) -- cfg->instructions[cfg->ins_count].src[0] = *structure->u.jump.condition; -+ case STRUCTURE_TYPE_LOOP: -+ if ((ret = vsir_cfg_structure_list_emit_loop(cfg, &structure->u.loop, loop_idx)) < 0) -+ return ret; -+ break; - -- ++cfg->ins_count; -+ case STRUCTURE_TYPE_SELECTION: -+ if ((ret = vsir_cfg_structure_list_emit_selection(cfg, &structure->u.selection, -+ loop_idx)) < 0) -+ return ret; -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ if ((ret = vsir_cfg_structure_list_emit_jump(cfg, &structure->u.jump, -+ loop_idx)) < 0) -+ return ret; - break; -- } - - default: - vkd3d_unreachable(); -@@ -4551,40 +4755,191 @@ static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, - } - - static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) -+{ -+ return vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX); -+} -+ -+static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) - { - enum vkd3d_result ret; -- size_t i; -+ struct vsir_cfg cfg; -+ -+ if ((ret = vsir_cfg_init(&cfg, program, message_context, target)) < 0) -+ return ret; - -- cfg->jump_target_temp_idx = cfg->program->temp_count; -- cfg->temp_count = cfg->program->temp_count + 1; -+ vsir_cfg_compute_dominators(&cfg); - -- if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count)) -+ if ((ret = vsir_cfg_compute_loops(&cfg)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_optimize(&cfg)) < 0) -+ goto out; -+ -+ ret = vsir_cfg_emit_structured_program(&cfg); -+ -+out: -+ vsir_cfg_cleanup(&cfg); -+ -+ return ret; -+} -+ -+static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vsir_cfg_emit_target target = {0}; -+ enum vkd3d_result ret; -+ unsigned int i; -+ -+ target.jump_target_temp_idx = program->temp_count; -+ target.temp_count = program->temp_count + 1; -+ -+ if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - /* Copy declarations until the first block. */ -- for (i = 0; i < cfg->program->instructions.count; ++i) -+ for (i = 0; i < program->instructions.count; ++i) - { -- struct vkd3d_shader_instruction *ins = &cfg->program->instructions.elements[i]; -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - - if (ins->handler_idx == VKD3DSIH_LABEL) - break; - -- cfg->instructions[cfg->ins_count++] = *ins; -+ target.instructions[target.ins_count++] = *ins; - } - -- if ((ret = vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX)) < 0) -+ if ((ret = vsir_program_structurize_function(program, message_context, &target)) < 0) - goto fail; - -- vkd3d_free(cfg->program->instructions.elements); -- cfg->program->instructions.elements = cfg->instructions; -- cfg->program->instructions.capacity = cfg->ins_capacity; -- cfg->program->instructions.count = cfg->ins_count; -- cfg->program->temp_count = cfg->temp_count; -+ vkd3d_free(program->instructions.elements); -+ program->instructions.elements = target.instructions; -+ program->instructions.capacity = target.ins_capacity; -+ program->instructions.count = target.ins_count; -+ program->temp_count = target.temp_count; - - return VKD3D_OK; - - fail: -- vkd3d_free(cfg->instructions); -+ vkd3d_free(target.instructions); -+ -+ return ret; -+} -+ -+static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc, -+ struct vsir_block *block, struct vsir_block **origin_blocks) -+{ -+ unsigned int i; -+ -+ if (!register_is_ssa(reg)) -+ return; -+ -+ i = reg->idx[0].offset; -+ if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) -+ alloc->table[i] = alloc->next_temp_idx++; -+ -+ for (i = 0; i < reg->idx_count; ++i) -+ if (reg->idx[i].rel_addr) -+ register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks); -+} -+ -+/* Drivers are not necessarily optimised to handle very large numbers of temps. For example, -+ * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV. -+ * This can also result in the backend emitting less code because temps typically need an -+ * access chain and a load/store. Conversion of phi SSA values to temps should eliminate all -+ * undominated SSA use, but structurisation may create new occurrences. */ -+static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) -+{ -+ struct vsir_program *program = cfg->program; -+ struct ssas_to_temps_alloc alloc = {0}; -+ struct vsir_block **origin_blocks; -+ unsigned int j; -+ size_t i; -+ -+ if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks)))) -+ { -+ ERR("Failed to allocate origin block array.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) -+ { -+ vkd3d_free(origin_blocks); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ struct vkd3d_shader_instruction *ins; -+ -+ for (ins = block->begin; ins <= block->end; ++ins) -+ { -+ for (j = 0; j < ins->dst_count; ++j) -+ { -+ if (register_is_ssa(&ins->dst[j].reg)) -+ origin_blocks[ins->dst[j].reg.idx[0].offset] = block; -+ } -+ } -+ } -+ -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ struct vkd3d_shader_instruction *ins; -+ -+ for (ins = block->begin; ins <= block->end; ++ins) -+ { -+ for (j = 0; j < ins->src_count; ++j) -+ register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks); -+ } -+ } -+ -+ if (alloc.next_temp_idx == program->temp_count) -+ goto done; -+ -+ TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); -+ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ -+ for (j = 0; j < ins->dst_count; ++j) -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); -+ -+ for (j = 0; j < ins->src_count; ++j) -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); -+ } -+ -+ program->temp_count = alloc.next_temp_idx; -+done: -+ vkd3d_free(origin_blocks); -+ vkd3d_free(alloc.table); -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ enum vkd3d_result ret; -+ struct vsir_cfg cfg; -+ -+ if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL)) < 0) -+ return ret; -+ -+ vsir_cfg_compute_dominators(&cfg); -+ -+ ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg); -+ -+ vsir_cfg_cleanup(&cfg); - - return ret; - } -@@ -5459,63 +5814,25 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - { - enum vkd3d_result result = VKD3D_OK; - -- remove_dcl_temps(program); -- -- if ((result = vsir_program_lower_texkills(program)) < 0) -+ if ((result = vsir_program_lower_instructions(program)) < 0) - return result; - - if (program->shader_version.major >= 6) - { -- struct vsir_cfg cfg; -- -- if ((result = lower_switch_to_if_ladder(program)) < 0) -- return result; -- -- if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) -- return result; -- -- if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) -+ if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) - return result; - -- vsir_cfg_compute_dominators(&cfg); -- -- if ((result = vsir_cfg_compute_loops(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -+ if ((result = lower_switch_to_if_ladder(program)) < 0) - return result; -- } - -- if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -+ if ((result = vsir_program_structurize(program, message_context)) < 0) - return result; -- } - -- if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -+ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; -- } - -- if ((result = vsir_cfg_build_structured_program(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -+ if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0) - return result; -- } -- -- if ((result = vsir_cfg_optimize(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -- return result; -- } -- -- if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -- return result; -- } -- -- vsir_cfg_cleanup(&cfg); - } - else - { -@@ -5545,10 +5862,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - - if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) - return result; -- } - -- if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) -- return result; -+ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) -+ return result; -+ } - - if (TRACE_ON()) - vkd3d_shader_trace(program); -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 1cae2d7d9d4..dfab1cb229b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -4419,11 +4419,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp - { - unsigned int component_count = vsir_write_mask_component_count(dst->write_mask); - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- uint32_t type_id, val_id; -+ uint32_t type_id, dst_type_id, val_id; - -+ type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - if (component_count > 1) - { -- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - val_id = vkd3d_spirv_build_op_composite_construct(builder, - type_id, component_ids, component_count); - } -@@ -4431,6 +4431,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp - { - val_id = *component_ids; - } -+ -+ dst_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); -+ if (dst_type_id != type_id) -+ val_id = vkd3d_spirv_build_op_bitcast(builder, dst_type_id, val_id); -+ - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - -@@ -7283,8 +7288,12 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - } - - general_implementation: -- write_mask = (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) -- ? vsir_write_mask_64_from_32(dst->write_mask) : dst->write_mask; -+ write_mask = dst->write_mask; -+ if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) -+ write_mask = vsir_write_mask_64_from_32(write_mask); -+ else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) -+ write_mask = vsir_write_mask_32_from_64(write_mask); -+ - val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); - if (dst->reg.data_type != src->reg.data_type) - { -@@ -8895,8 +8904,8 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; - uint32_t base_coordinate_id, component_idx; -- const struct vkd3d_shader_src_param *data; - struct vkd3d_shader_register_info reg_info; -+ struct vkd3d_shader_src_param data; - unsigned int component_count; - - if (!spirv_compiler_get_register_info(compiler, &dst->reg, ®_info)) -@@ -8908,8 +8917,9 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -- data = &src[instruction->src_count - 1]; -- val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); -+ data = src[instruction->src_count - 1]; -+ data.reg.data_type = VKD3D_DATA_UINT; -+ val_id = spirv_compiler_emit_load_src(compiler, &data, dst->write_mask); - - component_count = vsir_write_mask_component_count(dst->write_mask); - for (component_idx = 0; component_idx < component_count; ++component_idx) -@@ -9334,6 +9344,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; - uint32_t type_id, lod_id, val_id, miplevel_count_id; -+ enum vkd3d_shader_component_type component_type; - uint32_t constituents[VKD3D_VEC4_SIZE]; - unsigned int i, size_component_count; - struct vkd3d_shader_image image; -@@ -9370,10 +9381,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, - val_id = vkd3d_spirv_build_op_composite_construct(builder, - type_id, constituents, i + 2); - -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - if (instruction->flags == VKD3DSI_RESINFO_UINT) - { -- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); -+ /* SSA registers must match the specified result type. */ -+ if (!register_is_ssa(&dst->reg)) -+ val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); -+ else -+ component_type = VKD3D_SHADER_COMPONENT_UINT; - } - else - { -@@ -9382,7 +9399,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, - val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); - } - val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, -- VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask); -+ component_type, src[1].swizzle, dst->write_mask); - - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 4d0658313d5..d5019a5dd63 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -3385,10 +3385,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - if (profile->major_version >= 5) - { -- put_u32(&buffer, TAG_RD11); -+ put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); - put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ - put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ -- put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ -+ put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ - put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ - put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ - put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -@@ -3405,6 +3405,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - const struct extern_resource *resource = &extern_resources[i]; - uint32_t flags = 0; - -+ if (hlsl_version_ge(ctx, 5, 1)) -+ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); -+ - if (resource->is_user_packed) - flags |= D3D_SIF_USERPACKED; - -@@ -3437,6 +3440,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - if (!cbuffer->reg.allocated) - continue; - -+ if (hlsl_version_ge(ctx, 5, 1)) -+ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); -+ - if (cbuffer->reservation.reg_type) - flags |= D3D_SIF_USERPACKED; - -@@ -5343,7 +5349,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - &expr->node, arg1, arg2); - break; - -- case HLSL_OP3_MOVC: -+ case HLSL_OP3_TERNARY: - write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); - break; - -@@ -5399,7 +5405,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju - - case HLSL_IR_JUMP_DISCARD_NZ: - { -- instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; -+ instr.opcode = VKD3D_SM4_OP_DISCARD; -+ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; - - memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.src_count = 1; -@@ -5700,19 +5707,13 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - { - if (instr->data_type) - { -- if (instr->data_type->class == HLSL_CLASS_MATRIX) -- { -- hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); -- break; -- } -- else if (instr->data_type->class == HLSL_CLASS_OBJECT) -+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { -- hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); -+ hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", -+ instr->data_type->class); - break; - } - -- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); -- - if (!instr->reg.allocated) - { - assert(instr->type == HLSL_IR_CONSTANT); -@@ -5808,13 +5809,21 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -+ { -+ if (hlsl_version_ge(ctx, 5, 1)) -+ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); -+ - write_sm4_dcl_constant_buffer(&tpf, cbuffer); -+ } - } - - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - -+ if (hlsl_version_ge(ctx, 5, 1)) -+ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); -+ - if (resource->regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 6d442cd517d..9b37bbef70b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1761,6 +1761,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ - return compacted_swizzle; - } - -+static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask) -+{ -+ static const unsigned int swizzles[16] = -+ { -+ 0, -+ VKD3D_SHADER_SWIZZLE(X, X, X, X), -+ VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), -+ VKD3D_SHADER_SWIZZLE(X, Y, X, X), -+ VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), -+ VKD3D_SHADER_SWIZZLE(X, Z, X, X), -+ VKD3D_SHADER_SWIZZLE(Y, Z, X, X), -+ VKD3D_SHADER_SWIZZLE(X, Y, Z, X), -+ VKD3D_SHADER_SWIZZLE(W, W, W, W), -+ VKD3D_SHADER_SWIZZLE(X, W, X, X), -+ VKD3D_SHADER_SWIZZLE(Y, W, X, X), -+ VKD3D_SHADER_SWIZZLE(X, Y, W, X), -+ VKD3D_SHADER_SWIZZLE(Z, W, X, X), -+ VKD3D_SHADER_SWIZZLE(X, Z, W, X), -+ VKD3D_SHADER_SWIZZLE(Y, Z, W, X), -+ VKD3D_SHADER_SWIZZLE(X, Y, Z, W), -+ }; -+ -+ return swizzles[writemask & 0xf]; -+} -+ - struct vkd3d_struct - { - enum vkd3d_shader_structure_type type; -diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c -new file mode 100644 -index 00000000000..56ba6990420 ---- /dev/null -+++ b/libs/vkd3d/libs/vkd3d/cache.c -@@ -0,0 +1,59 @@ -+/* -+ * Copyright 2024 Stefan Dösinger for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#include "vkd3d_private.h" -+ -+struct vkd3d_shader_cache -+{ -+ unsigned int refcount; -+}; -+ -+int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) -+{ -+ struct vkd3d_shader_cache *object; -+ -+ TRACE("%p.\n", cache); -+ -+ object = vkd3d_malloc(sizeof(*object)); -+ if (!object) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ object->refcount = 1; -+ *cache = object; -+ -+ return VKD3D_OK; -+} -+ -+unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache) -+{ -+ unsigned int refcount = vkd3d_atomic_increment_u32(&cache->refcount); -+ TRACE("cache %p refcount %u.\n", cache, refcount); -+ return refcount; -+} -+ -+unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) -+{ -+ unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount); -+ TRACE("cache %p refcount %u.\n", cache, refcount); -+ -+ if (refcount) -+ return refcount; -+ -+ vkd3d_free(cache); -+ return 0; -+} -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 65db8b70bfd..90de27c53b6 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -2529,11 +2529,17 @@ struct d3d12_cache_session - ID3D12ShaderCacheSession ID3D12ShaderCacheSession_iface; - unsigned int refcount; - -+ struct list cache_list_entry; -+ - struct d3d12_device *device; - struct vkd3d_private_store private_store; - D3D12_SHADER_CACHE_SESSION_DESC desc; -+ struct vkd3d_shader_cache *cache; - }; - -+static struct vkd3d_mutex cache_list_mutex = VKD3D_MUTEX_INITIALIZER; -+static struct list cache_list = LIST_INIT(cache_list); -+ - static inline struct d3d12_cache_session *impl_from_ID3D12ShaderCacheSession(ID3D12ShaderCacheSession *iface) - { - return CONTAINING_RECORD(iface, struct d3d12_cache_session, ID3D12ShaderCacheSession_iface); -@@ -2582,6 +2588,11 @@ static void d3d12_cache_session_destroy(struct d3d12_cache_session *session) - - TRACE("Destroying cache session %p.\n", session); - -+ vkd3d_mutex_lock(&cache_list_mutex); -+ list_remove(&session->cache_list_entry); -+ vkd3d_mutex_unlock(&cache_list_mutex); -+ -+ vkd3d_shader_cache_decref(session->cache); - vkd3d_private_store_destroy(&session->private_store); - vkd3d_free(session); - -@@ -2707,11 +2718,14 @@ static const struct ID3D12ShaderCacheSessionVtbl d3d12_cache_session_vtbl = - static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, - struct d3d12_device *device, const D3D12_SHADER_CACHE_SESSION_DESC *desc) - { -+ struct d3d12_cache_session *i; -+ enum vkd3d_result ret; - HRESULT hr; - - session->ID3D12ShaderCacheSession_iface.lpVtbl = &d3d12_cache_session_vtbl; - session->refcount = 1; - session->desc = *desc; -+ session->cache = NULL; - - if (!session->desc.MaximumValueFileSizeBytes) - session->desc.MaximumValueFileSizeBytes = 128 * 1024 * 1024; -@@ -2723,9 +2737,56 @@ static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, - if (FAILED(hr = vkd3d_private_store_init(&session->private_store))) - return hr; - -+ vkd3d_mutex_lock(&cache_list_mutex); -+ -+ /* We expect the number of open caches to be small. */ -+ LIST_FOR_EACH_ENTRY(i, &cache_list, struct d3d12_cache_session, cache_list_entry) -+ { -+ if (!memcmp(&i->desc.Identifier, &desc->Identifier, sizeof(desc->Identifier))) -+ { -+ TRACE("Found an existing cache %p from session %p.\n", i->cache, i); -+ if (desc->Version == i->desc.Version) -+ { -+ session->desc = i->desc; -+ vkd3d_shader_cache_incref(session->cache = i->cache); -+ break; -+ } -+ else -+ { -+ WARN("version mismatch: Existing %"PRIu64" new %"PRIu64".\n", -+ i->desc.Version, desc->Version); -+ hr = DXGI_ERROR_ALREADY_EXISTS; -+ goto error; -+ } -+ } -+ } -+ -+ if (!session->cache) -+ { -+ if (session->desc.Mode == D3D12_SHADER_CACHE_MODE_DISK) -+ FIXME("Disk caches are not yet implemented.\n"); -+ -+ ret = vkd3d_shader_open_cache(&session->cache); -+ if (ret) -+ { -+ WARN("Failed to open shader cache.\n"); -+ hr = hresult_from_vkd3d_result(ret); -+ goto error; -+ } -+ } -+ -+ /* Add it to the list even if we reused an existing cache. The other session might be destroyed, -+ * but the cache stays alive and can be opened a third time. */ -+ list_add_tail(&cache_list, &session->cache_list_entry); - d3d12_device_add_ref(session->device = device); - -+ vkd3d_mutex_unlock(&cache_list_mutex); - return S_OK; -+ -+error: -+ vkd3d_private_store_destroy(&session->private_store); -+ vkd3d_mutex_unlock(&cache_list_mutex); -+ return hr; - } - - /* ID3D12Device */ -@@ -4874,6 +4935,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateShaderCacheSession(ID3D12Dev - WARN("No output pointer, returning S_FALSE.\n"); - return S_FALSE; - } -+ *session = NULL; - - if (!(object = vkd3d_malloc(sizeof(*object)))) - return E_OUTOFMEMORY; -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index b83a45d0606..179999148bc 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -1893,6 +1893,13 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 - WARN("Invalid sample count 0.\n"); - return E_INVALIDARG; - } -+ if (desc->SampleDesc.Count > 1 -+ && !(desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) -+ { -+ WARN("Sample count %u invalid without ALLOW_RENDER_TARGET or ALLOW_DEPTH_STENCIL.\n", -+ desc->SampleDesc.Count); -+ return E_INVALIDARG; -+ } - - if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) - { -@@ -1996,6 +2003,11 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - WARN("Invalid initial resource state %#x.\n", initial_state); - return E_INVALIDARG; - } -+ if (initial_state == D3D12_RESOURCE_STATE_RENDER_TARGET && !(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) -+ { -+ WARN("Invalid initial resource state %#x for non-render-target.\n", initial_state); -+ return E_INVALIDARG; -+ } - - if (optimized_clear_value && d3d12_resource_is_buffer(resource)) - { -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 08cc110e8f7..b8328216a29 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -2045,6 +2045,9 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState - - d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); - -+ if (state->implicit_root_signature) -+ d3d12_root_signature_Release(state->implicit_root_signature); -+ - vkd3d_free(state); - - d3d12_device_release(device); -@@ -2413,8 +2416,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - struct vkd3d_shader_interface_info shader_interface; - struct vkd3d_shader_descriptor_offset_info offset_info; -- const struct d3d12_root_signature *root_signature; - struct vkd3d_shader_spirv_target_info target_info; -+ struct d3d12_root_signature *root_signature; - VkPipelineLayout vk_pipeline_layout; - HRESULT hr; - -@@ -2425,13 +2428,27 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st - - if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->root_signature))) - { -- WARN("Root signature is NULL.\n"); -- return E_INVALIDARG; -+ TRACE("Root signature is NULL, looking for an embedded signature.\n"); -+ if (FAILED(hr = d3d12_root_signature_create(device, -+ desc->cs.pShaderBytecode, desc->cs.BytecodeLength, &root_signature))) -+ { -+ WARN("Failed to find an embedded root signature, hr %s.\n", debugstr_hresult(hr)); -+ return hr; -+ } -+ state->implicit_root_signature = &root_signature->ID3D12RootSignature_iface; -+ } -+ else -+ { -+ state->implicit_root_signature = NULL; - } - - if (FAILED(hr = d3d12_pipeline_state_find_and_init_uav_counters(state, device, root_signature, - &desc->cs, VK_SHADER_STAGE_COMPUTE_BIT))) -+ { -+ if (state->implicit_root_signature) -+ d3d12_root_signature_Release(state->implicit_root_signature); - return hr; -+ } - - memset(&target_info, 0, sizeof(target_info)); - target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; -@@ -2476,6 +2493,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st - { - WARN("Failed to create Vulkan compute pipeline, hr %s.\n", debugstr_hresult(hr)); - d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); -+ if (state->implicit_root_signature) -+ d3d12_root_signature_Release(state->implicit_root_signature); - return hr; - } - -@@ -2483,6 +2502,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st - { - VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL)); - d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); -+ if (state->implicit_root_signature) -+ d3d12_root_signature_Release(state->implicit_root_signature); - return hr; - } - -@@ -3484,6 +3505,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s - goto fail; - - state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; -+ state->implicit_root_signature = NULL; - d3d12_device_add_ref(state->device = device); - - return S_OK; -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 14c8eb54574..9f5f759667a 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -1213,6 +1213,7 @@ struct d3d12_pipeline_state - - struct d3d12_pipeline_uav_counter_state uav_counters; - -+ ID3D12RootSignature *implicit_root_signature; - struct d3d12_device *device; - - struct vkd3d_private_store private_store; -@@ -1927,4 +1928,10 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) - vkd3d_header->next = vkd3d_structure; - } - -+struct vkd3d_shader_cache; -+ -+int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache); -+unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache); -+unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache); -+ - #endif /* __VKD3D_PRIVATE_H */ --- -2.43.0 - diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-e17e481130e095315d57a3d8cc66cc98c4b.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-e17e481130e095315d57a3d8cc66cc98c4b.patch deleted file mode 100644 index 4cba7115..00000000 --- a/patches/vkd3d-latest/0006-Updated-vkd3d-to-e17e481130e095315d57a3d8cc66cc98c4b.patch +++ /dev/null @@ -1,1714 +0,0 @@ -From bb098748c521482ac9e4c4c8760a91207ad36000 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 18 Apr 2024 12:17:44 +1000 -Subject: [PATCH] Updated vkd3d to e17e481130e095315d57a3d8cc66cc98c4b3f244. - ---- - libs/vkd3d/include/vkd3d_shader.h | 11 + - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 132 +++-- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 529 +++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 6 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 162 +++++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 27 +- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 2 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 14 +- - libs/vkd3d/libs/vkd3d/command.c | 20 + - libs/vkd3d/libs/vkd3d/device.c | 22 + - libs/vkd3d/libs/vkd3d/state.c | 6 +- - libs/vkd3d/libs/vkd3d/utils.c | 10 + - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 18 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 3 + - 14 files changed, 866 insertions(+), 96 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 5cc36e186e2..2b32b8a3e98 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -215,6 +215,15 @@ enum vkd3d_shader_compile_option_feature_flags - * This corresponds to the "shaderFloat64" feature in the Vulkan API, and - * the "GL_ARB_gpu_shader_fp64" extension in the OpenGL API. */ - VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64 = 0x00000002, -+ /** The SPIR-V target environment supports wave operations. -+ * This flag is valid only in VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1 -+ * or greater, and corresponds to the following minimum requirements in -+ * VkPhysicalDeviceSubgroupProperties: -+ * - subgroupSize >= 4. -+ * - supportedOperations has BASIC, VOTE, ARITHMETIC, BALLOT, SHUFFLE and -+ * QUAD bits set. -+ * - supportedStages include COMPUTE and FRAGMENT. \since 1.12 */ -+ VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS = 0x00000004, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLAGS), - }; -@@ -923,6 +932,8 @@ enum vkd3d_shader_spirv_environment - VKD3D_SHADER_SPIRV_ENVIRONMENT_NONE, - VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5, - VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0, /* default target */ -+ /** \since 1.12 */ -+ VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_ENVIRONMENT), - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 459fdfc9abf..cd8ba0a7d2b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -353,18 +353,6 @@ struct vkd3d_d3d_asm_compiler - const struct vkd3d_shader_instruction *current; - }; - --static int VKD3D_PRINTF_FUNC(2, 3) shader_addline(struct vkd3d_string_buffer *buffer, const char *format, ...) --{ -- va_list args; -- int ret; -- -- va_start(args, format); -- ret = vkd3d_string_buffer_vprintf(buffer, format, args); -- va_end(args); -- -- return ret; --} -- - /* Convert floating point offset relative to a register file to an absolute - * offset for float constants. */ - static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx) -@@ -1572,19 +1560,37 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, - - switch (dst->shift) - { -- case 0: break; -- case 13: shader_addline(buffer, "_d8"); break; -- case 14: shader_addline(buffer, "_d4"); break; -- case 15: shader_addline(buffer, "_d2"); break; -- case 1: shader_addline(buffer, "_x2"); break; -- case 2: shader_addline(buffer, "_x4"); break; -- case 3: shader_addline(buffer, "_x8"); break; -- default: shader_addline(buffer, "_unhandled_shift(%d)", dst->shift); break; -+ case 0: -+ break; -+ case 13: -+ vkd3d_string_buffer_printf(buffer, "_d8"); -+ break; -+ case 14: -+ vkd3d_string_buffer_printf(buffer, "_d4"); -+ break; -+ case 15: -+ vkd3d_string_buffer_printf(buffer, "_d2"); -+ break; -+ case 1: -+ vkd3d_string_buffer_printf(buffer, "_x2"); -+ break; -+ case 2: -+ vkd3d_string_buffer_printf(buffer, "_x4"); -+ break; -+ case 3: -+ vkd3d_string_buffer_printf(buffer, "_x8"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_unhandled_shift(%d)", dst->shift); -+ break; - } - -- if (mmask & VKD3DSPDM_SATURATE) shader_addline(buffer, "_sat"); -- if (mmask & VKD3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp"); -- if (mmask & VKD3DSPDM_MSAMPCENTROID) shader_addline(buffer, "_centroid"); -+ if (mmask & VKD3DSPDM_SATURATE) -+ vkd3d_string_buffer_printf(buffer, "_sat"); -+ if (mmask & VKD3DSPDM_PARTIALPRECISION) -+ vkd3d_string_buffer_printf(buffer, "_pp"); -+ if (mmask & VKD3DSPDM_MSAMPCENTROID) -+ vkd3d_string_buffer_printf(buffer, "_centroid"); - - mmask &= ~VKD3DSPDM_MASK; - if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask); -@@ -1730,9 +1736,15 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - case VKD3DSIH_RETP: - switch (ins->flags) - { -- case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; -- case VKD3D_SHADER_CONDITIONAL_OP_Z: shader_addline(buffer, "_z"); break; -- default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); break; -+ case VKD3D_SHADER_CONDITIONAL_OP_NZ: -+ vkd3d_string_buffer_printf(buffer, "_nz"); -+ break; -+ case VKD3D_SHADER_CONDITIONAL_OP_Z: -+ vkd3d_string_buffer_printf(buffer, "_z"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); -+ break; - } - break; - -@@ -1740,32 +1752,58 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - case VKD3DSIH_BREAKC: - switch (ins->flags) - { -- case VKD3D_SHADER_REL_OP_GT: shader_addline(buffer, "_gt"); break; -- case VKD3D_SHADER_REL_OP_EQ: shader_addline(buffer, "_eq"); break; -- case VKD3D_SHADER_REL_OP_GE: shader_addline(buffer, "_ge"); break; -- case VKD3D_SHADER_REL_OP_LT: shader_addline(buffer, "_lt"); break; -- case VKD3D_SHADER_REL_OP_NE: shader_addline(buffer, "_ne"); break; -- case VKD3D_SHADER_REL_OP_LE: shader_addline(buffer, "_le"); break; -- default: shader_addline(buffer, "_(%u)", ins->flags); -+ case VKD3D_SHADER_REL_OP_GT: -+ vkd3d_string_buffer_printf(buffer, "_gt"); -+ break; -+ case VKD3D_SHADER_REL_OP_EQ: -+ vkd3d_string_buffer_printf(buffer, "_eq"); -+ break; -+ case VKD3D_SHADER_REL_OP_GE: -+ vkd3d_string_buffer_printf(buffer, "_ge"); -+ break; -+ case VKD3D_SHADER_REL_OP_LT: -+ vkd3d_string_buffer_printf(buffer, "_lt"); -+ break; -+ case VKD3D_SHADER_REL_OP_NE: -+ vkd3d_string_buffer_printf(buffer, "_ne"); -+ break; -+ case VKD3D_SHADER_REL_OP_LE: -+ vkd3d_string_buffer_printf(buffer, "_le"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_(%u)", ins->flags); -+ break; - } - break; - - case VKD3DSIH_RESINFO: - switch (ins->flags) - { -- case VKD3DSI_NONE: break; -- case VKD3DSI_RESINFO_RCP_FLOAT: shader_addline(buffer, "_rcpFloat"); break; -- case VKD3DSI_RESINFO_UINT: shader_addline(buffer, "_uint"); break; -- default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); -+ case VKD3DSI_NONE: -+ break; -+ case VKD3DSI_RESINFO_RCP_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "_rcpFloat"); -+ break; -+ case VKD3DSI_RESINFO_UINT: -+ vkd3d_string_buffer_printf(buffer, "_uint"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); -+ break; - } - break; - - case VKD3DSIH_SAMPLE_INFO: - switch (ins->flags) - { -- case VKD3DSI_NONE: break; -- case VKD3DSI_SAMPLE_INFO_UINT: shader_addline(buffer, "_uint"); break; -- default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); -+ case VKD3DSI_NONE: -+ break; -+ case VKD3DSI_SAMPLE_INFO_UINT: -+ vkd3d_string_buffer_printf(buffer, "_uint"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); -+ break; - } - break; - -@@ -1788,14 +1826,14 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - - case VKD3DSIH_TEX: - if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) -- shader_addline(buffer, "p"); -+ vkd3d_string_buffer_printf(buffer, "p"); - break; - - case VKD3DSIH_ISHL: - case VKD3DSIH_ISHR: - case VKD3DSIH_USHR: - if (ins->flags & VKD3DSI_SHIFT_UNMASKED) -- shader_addline(buffer, "_unmasked"); -+ vkd3d_string_buffer_printf(buffer, "_unmasked"); - /* fall through */ - default: - shader_dump_precise_flags(compiler, ins->flags); -@@ -1842,7 +1880,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, - shader_print_hex_literal(compiler, ", ", icb->data[4 * i + 3], "},\n"); - } - } -- shader_addline(buffer, "}"); -+ vkd3d_string_buffer_printf(buffer, "}"); - } - - static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, -@@ -1880,7 +1918,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - shader_print_subscript(compiler, ins->declaration.cb.size, NULL); - else if (vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1)) - shader_print_subscript(compiler, ins->declaration.cb.size / VKD3D_VEC4_SIZE / sizeof(float), NULL); -- shader_addline(buffer, ", %s", -+ vkd3d_string_buffer_printf(buffer, ", %s", - ins->flags & VKD3DSI_INDEXED_DYNAMIC ? "dynamicIndexed" : "immediateIndexed"); - shader_dump_register_space(compiler, ins->declaration.cb.range.space); - break; -@@ -2057,7 +2095,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - - if (ins->resource_type != VKD3D_SHADER_RESOURCE_NONE) - { -- shader_addline(buffer, "_indexable("); -+ vkd3d_string_buffer_printf(buffer, "_indexable("); - if (ins->raw) - vkd3d_string_buffer_printf(buffer, "raw_"); - if (ins->structured) -@@ -2065,7 +2103,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - shader_dump_resource_type(compiler, ins->resource_type); - if (ins->resource_stride) - shader_print_uint_literal(compiler, ", stride=", ins->resource_stride, ""); -- shader_addline(buffer, ")"); -+ vkd3d_string_buffer_printf(buffer, ")"); - } - - if (vkd3d_shader_instruction_has_texel_offset(ins)) -@@ -2095,7 +2133,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - } - -- shader_addline(buffer, "\n"); -+ vkd3d_string_buffer_printf(buffer, "\n"); - } - - static const char *get_sysval_semantic_name(enum vkd3d_shader_sysval_semantic semantic) -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index da8ba662dbc..d0a799b100e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -401,6 +401,9 @@ enum dx_intrinsic_opcode - DX_GET_DIMENSIONS = 72, - DX_TEXTURE_GATHER = 73, - DX_TEXTURE_GATHER_CMP = 74, -+ DX_TEX2DMS_GET_SAMPLE_POS = 75, -+ DX_RT_GET_SAMPLE_POS = 76, -+ DX_RT_GET_SAMPLE_COUNT = 77, - DX_ATOMIC_BINOP = 78, - DX_ATOMIC_CMP_XCHG = 79, - DX_BARRIER = 80, -@@ -416,6 +419,7 @@ enum dx_intrinsic_opcode - DX_FLATTENED_THREAD_ID_IN_GROUP = 96, - DX_MAKE_DOUBLE = 101, - DX_SPLIT_DOUBLE = 102, -+ DX_PRIMITIVE_ID = 108, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, - DX_RAW_BUFFER_LOAD = 139, -@@ -791,6 +795,7 @@ struct sm6_parser - size_t global_symbol_count; - - const char *entry_point; -+ const char *patch_constant_function; - - struct vkd3d_shader_dst_param *output_params; - struct vkd3d_shader_dst_param *input_params; -@@ -1933,6 +1938,25 @@ static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type - return NULL; - } - -+static const struct sm6_type *sm6_type_get_cmpxchg_result_struct(struct sm6_parser *sm6) -+{ -+ const struct sm6_type *type; -+ unsigned int i; -+ -+ for (i = 0; i < sm6->type_count; ++i) -+ { -+ type = &sm6->types[i]; -+ if (sm6_type_is_struct(type) && type->u.struc->elem_count == 2 -+ && sm6_type_is_i32(type->u.struc->elem_types[0]) -+ && sm6_type_is_bool(type->u.struc->elem_types[1])) -+ { -+ return type; -+ } -+ } -+ -+ return NULL; -+} -+ - /* Call for aggregate types only. */ - static const struct sm6_type *sm6_type_get_element_type_at_index(const struct sm6_type *type, uint64_t elem_idx) - { -@@ -2574,6 +2598,26 @@ static bool sm6_value_validate_is_texture_handle(const struct sm6_value *value, - return true; - } - -+static bool sm6_value_validate_is_texture_2dms_handle(const struct sm6_value *value, enum dx_intrinsic_opcode op, -+ struct sm6_parser *sm6) -+{ -+ enum dxil_resource_kind kind; -+ -+ if (!sm6_value_validate_is_handle(value, sm6)) -+ return false; -+ -+ kind = value->u.handle.d->kind; -+ if (!resource_kind_is_multisampled(kind)) -+ { -+ WARN("Resource kind %u for op %u is not a 2DMS texture.\n", kind, op); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCE_HANDLE, -+ "Resource kind %u for texture operation %u is not a 2DMS texture.", kind, op); -+ return false; -+ } -+ -+ return true; -+} -+ - static bool sm6_value_validate_is_sampler_handle(const struct sm6_value *value, enum dx_intrinsic_opcode op, - struct sm6_parser *sm6) - { -@@ -2643,6 +2687,18 @@ static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, - return true; - } - -+static bool sm6_value_validate_is_i32(const struct sm6_value *value, struct sm6_parser *sm6) -+{ -+ if (!sm6_type_is_i32(value->type)) -+ { -+ WARN("Operand result type %u is not i32.\n", value->type->class); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "An int32 operand passed to a DXIL instruction is not an int32."); -+ return false; -+ } -+ return true; -+} -+ - static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) - { - if (idx < sm6->value_count) -@@ -2790,7 +2846,7 @@ static inline uint64_t decode_rotated_signed_value(uint64_t value) - return value << 63; - } - --static inline float bitcast_uint64_to_float(uint64_t value) -+static float bitcast_uint_to_float(unsigned int value) - { - union - { -@@ -2814,6 +2870,23 @@ static inline double bitcast_uint64_to_double(uint64_t value) - return u.double_value; - } - -+static float register_get_float_value(const struct vkd3d_shader_register *reg) -+{ -+ if (!register_is_constant(reg) || !data_type_is_floating_point(reg->data_type)) -+ return 0.0; -+ -+ if (reg->dimension == VSIR_DIMENSION_VEC4) -+ WARN("Returning vec4.x.\n"); -+ -+ if (reg->type == VKD3DSPR_IMMCONST64) -+ { -+ WARN("Truncating double to float.\n"); -+ return bitcast_uint64_to_double(reg->u.immconst_u64[0]); -+ } -+ -+ return bitcast_uint_to_float(reg->u.immconst_u32[0]); -+} -+ - static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, const struct sm6_type *type, - const uint64_t *operands, struct sm6_parser *sm6) - { -@@ -3098,7 +3171,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - if (type->u.width == 16) - dst->u.reg.u.immconst_u32[0] = record->operands[0]; - else if (type->u.width == 32) -- dst->u.reg.u.immconst_f32[0] = bitcast_uint64_to_float(record->operands[0]); -+ dst->u.reg.u.immconst_f32[0] = bitcast_uint_to_float(record->operands[0]); - else if (type->u.width == 64) - dst->u.reg.u.immconst_f64[0] = bitcast_uint64_to_double(record->operands[0]); - else -@@ -4567,6 +4640,22 @@ static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, - } - } - -+static void sm6_parser_emit_dx_input_register_mov(struct sm6_parser *sm6, -+ struct vkd3d_shader_instruction *ins, enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type) -+{ -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ sm6_parser_dcl_register_builtin(sm6, reg_type, data_type, 1); -+ vsir_register_init(&src_param->reg, reg_type, data_type, 0); -+ src_param_init(src_param); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static const struct sm6_descriptor_info *sm6_parser_get_descriptor(struct sm6_parser *sm6, - enum vkd3d_shader_descriptor_type type, unsigned int id, const struct sm6_value *address) - { -@@ -4928,6 +5017,12 @@ static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intri - instruction_dst_param_init_ssa_scalar(ins, sm6); - } - -+static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); -+} -+ - static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -5150,6 +5245,59 @@ static void sm6_parser_emit_dx_buffer_store(struct sm6_parser *sm6, enum dx_intr - dst_param->reg = resource->u.handle.reg; - } - -+static void sm6_parser_emit_dx_get_sample_count(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_INFO); -+ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ vsir_register_init(&src_param->reg, VKD3DSPR_RASTERIZER, VKD3D_DATA_FLOAT, 0); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param_init(src_param); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+ ins->dst->reg.data_type = VKD3D_DATA_FLOAT; -+} -+ -+static void sm6_parser_emit_dx_get_sample_pos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_params; -+ const struct sm6_value *resource = NULL; -+ -+ if (op == DX_TEX2DMS_GET_SAMPLE_POS) -+ { -+ resource = operands[0]; -+ if (!sm6_value_validate_is_texture_2dms_handle(resource, op, sm6)) -+ return; -+ } -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_POS); -+ -+ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) -+ return; -+ if (op == DX_TEX2DMS_GET_SAMPLE_POS) -+ { -+ src_param_init_vector_from_reg(&src_params[0], &resource->u.handle.reg); -+ src_param_init_from_value(&src_params[1], operands[1]); -+ } -+ else -+ { -+ src_param_init_vector(&src_params[0], 2); -+ vsir_register_init(&src_params[0].reg, VKD3DSPR_RASTERIZER, VKD3D_DATA_FLOAT, 0); -+ src_params[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param_init_from_value(&src_params[1], operands[0]); -+ } -+ -+ instruction_dst_param_init_ssa_vector(ins, 2, sm6); -+} -+ - static unsigned int sm6_value_get_texel_offset(const struct sm6_value *value) - { - return sm6_value_is_undef(value) ? 0 : sm6_value_get_constant_uint(value); -@@ -5590,6 +5738,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_LOAD_INPUT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, - [DX_LOG ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, -+ [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, - [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, - [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, - [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, -@@ -5597,6 +5746,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_ROUND_PI ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_ROUND_Z ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_RSQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_RT_GET_SAMPLE_COUNT ] = {"i", "", sm6_parser_emit_dx_get_sample_count}, -+ [DX_RT_GET_SAMPLE_POS ] = {"o", "i", sm6_parser_emit_dx_get_sample_pos}, - [DX_SAMPLE ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_B ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_C ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, -@@ -5609,6 +5760,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, - [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_TEX2DMS_GET_SAMPLE_POS ] = {"o", "Hi", sm6_parser_emit_dx_get_sample_pos}, - [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, - [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, - [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, -@@ -6130,6 +6282,87 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor - instruction_dst_param_init_ssa_scalar(ins, sm6); - } - -+static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct vkd3d_shader_instruction *ins, struct sm6_value *dst) -+{ -+ uint64_t success_ordering, failure_ordering; -+ struct vkd3d_shader_dst_param *dst_params; -+ struct vkd3d_shader_src_param *src_params; -+ const struct sm6_value *ptr, *cmp, *new; -+ const struct sm6_type *type; -+ unsigned int i = 0; -+ bool is_volatile; -+ uint64_t code; -+ -+ if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -+ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) -+ return; -+ -+ if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) -+ { -+ WARN("Register is not groupshared.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "The destination register for a cmpxchg instruction is not groupshared memory."); -+ return; -+ } -+ -+ if (!(dst->type = sm6_type_get_cmpxchg_result_struct(sm6))) -+ { -+ WARN("Failed to find result struct.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "Module does not define a result struct type for a cmpxchg instruction."); -+ return; -+ } -+ -+ type = ptr->type->u.pointer.type; -+ cmp = sm6_parser_get_value_by_ref(sm6, record, type, &i); -+ new = sm6_parser_get_value_by_ref(sm6, record, type, &i); -+ if (!cmp || !new) -+ return; -+ -+ if (!sm6_value_validate_is_i32(cmp, sm6) -+ || !sm6_value_validate_is_i32(new, sm6) -+ || !dxil_record_validate_operand_count(record, i + 3, i + 5, sm6)) -+ { -+ return; -+ } -+ -+ is_volatile = record->operands[i++]; -+ success_ordering = record->operands[i++]; -+ -+ if ((code = record->operands[i++]) != 1) -+ FIXME("Ignoring synchronisation scope %"PRIu64".\n", code); -+ -+ failure_ordering = (record->operand_count > i) ? record->operands[i++] : success_ordering; -+ -+ /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ -+ if (success_ordering != ORDERING_SEQCST) -+ FIXME("Unhandled success ordering %"PRIu64".\n", success_ordering); -+ if (success_ordering != failure_ordering) -+ FIXME("Unhandled failure ordering %"PRIu64".\n", failure_ordering); -+ -+ if (record->operand_count > i && record->operands[i]) -+ FIXME("Ignoring weak cmpxchg.\n"); -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_IMM_ATOMIC_CMP_EXCH); -+ ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; -+ -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; -+ src_param_make_constant_uint(&src_params[0], 0); -+ src_param_init_from_value(&src_params[1], cmp); -+ src_param_init_from_value(&src_params[2], new); -+ -+ if (!(dst_params = instruction_dst_params_alloc(ins, 2, sm6))) -+ return; -+ register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); -+ dst_param_init(&dst_params[0]); -+ dst_params[1].reg = ptr->u.reg; -+ dst_param_init(&dst_params[1]); -+ -+ dst->u.reg = dst_params[0].reg; -+} -+ - static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil_record *record, - struct vkd3d_shader_instruction *ins, struct sm6_value *dst) - { -@@ -6728,6 +6961,25 @@ static bool sm6_metadata_get_uint64_value(const struct sm6_parser *sm6, - return true; - } - -+static bool sm6_metadata_get_float_value(const struct sm6_parser *sm6, -+ const struct sm6_metadata_value *m, float *f) -+{ -+ const struct sm6_value *value; -+ -+ if (!m || m->type != VKD3D_METADATA_VALUE) -+ return false; -+ -+ value = m->u.value; -+ if (!sm6_value_is_constant(value)) -+ return false; -+ if (!sm6_type_is_floating_point(value->type)) -+ return false; -+ -+ *f = register_get_float_value(&value->u.reg); -+ -+ return true; -+} -+ - static void sm6_parser_metadata_attachment_block_init(struct sm6_parser *sm6, const struct dxil_block *target_block, - const struct dxil_block *block) - { -@@ -7115,6 +7367,9 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - case FUNC_CODE_INST_CMP2: - sm6_parser_emit_cmp2(sm6, record, ins, dst); - break; -+ case FUNC_CODE_INST_CMPXCHG: -+ sm6_parser_emit_cmpxchg(sm6, record, ins, dst); -+ break; - case FUNC_CODE_INST_EXTRACTVAL: - sm6_parser_emit_extractval(sm6, record, ins, dst); - break; -@@ -7685,9 +7940,40 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = - [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, - }; - --static enum vkd3d_shader_sysval_semantic sysval_semantic_from_dxil_semantic_kind(enum dxil_semantic_kind kind) -+static enum vkd3d_shader_sysval_semantic sysval_semantic_from_dxil_semantic_kind(enum dxil_semantic_kind kind, -+ enum vkd3d_tessellator_domain domain) - { -- if (kind < ARRAY_SIZE(sysval_semantic_table)) -+ if (kind == SEMANTIC_KIND_TESSFACTOR) -+ { -+ switch (domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ return VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ return VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; -+ default: -+ /* Error is handled during parsing. */ -+ return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; -+ } -+ } -+ else if (kind == SEMANTIC_KIND_INSIDETESSFACTOR) -+ { -+ switch (domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ return VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ return VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ return VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; -+ default: -+ /* Error is handled during parsing. */ -+ return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; -+ } -+ } -+ else if (kind < ARRAY_SIZE(sysval_semantic_table)) - { - return sysval_semantic_table[kind]; - } -@@ -8443,7 +8729,7 @@ static void signature_element_read_additional_element_values(struct signature_el - } - - static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, -- struct shader_signature *s) -+ struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) - { - unsigned int i, j, column_count, operand_count, index; - const struct sm6_metadata_node *node, *element_node; -@@ -8536,7 +8822,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - e->min_precision = minimum_precision_from_dxil_component_type(values[2]); - - j = values[3]; -- e->sysval_semantic = sysval_semantic_from_dxil_semantic_kind(j); -+ e->sysval_semantic = sysval_semantic_from_dxil_semantic_kind(j, tessellator_domain); - if (j != SEMANTIC_KIND_ARBITRARY && j != SEMANTIC_KIND_TARGET && e->sysval_semantic == VKD3D_SHADER_SV_NONE) - { - WARN("Unhandled semantic kind %u.\n", j); -@@ -8619,7 +8905,8 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - return VKD3D_OK; - } - --static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) -+static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, -+ enum vkd3d_tessellator_domain tessellator_domain) - { - enum vkd3d_result ret; - -@@ -8632,12 +8919,12 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons - } - - if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], -- &sm6->p.program.input_signature)) < 0) -+ &sm6->p.program.input_signature, tessellator_domain)) < 0) - { - return ret; - } - if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], -- &sm6->p.program.output_signature)) < 0) -+ &sm6->p.program.output_signature, tessellator_domain)) < 0) - { - return ret; - } -@@ -8730,10 +9017,216 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co - return VKD3D_OK; - } - -+static void sm6_parser_emit_dcl_count(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, unsigned int count) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ ins = sm6_parser_add_instruction(sm6, handler_idx); -+ ins->declaration.count = count; -+} -+ -+static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, -+ enum vkd3d_tessellator_domain tessellator_domain) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ if (tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID || tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) -+ { -+ WARN("Unhandled domain %u.\n", tessellator_domain); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Domain shader tessellator domain %u is unhandled.", tessellator_domain); -+ } -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); -+ ins->declaration.tessellator_domain = tessellator_domain; -+} -+ -+static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, -+ const char *type) -+{ -+ if (!count || count > 32) -+ { -+ WARN("%s control point count %u invalid.\n", type, count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "%s control point count %u is invalid.", type, count); -+ } -+} -+ -+static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, -+ enum vkd3d_shader_tessellator_partitioning tessellator_partitioning) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!tessellator_partitioning || tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) -+ { -+ WARN("Unhandled partitioning %u.\n", tessellator_partitioning); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader tessellator partitioning %u is unhandled.", tessellator_partitioning); -+ } -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); -+ ins->declaration.tessellator_partitioning = tessellator_partitioning; -+} -+ -+static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, -+ enum vkd3d_shader_tessellator_output_primitive primitive) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!primitive || primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) -+ { -+ WARN("Unhandled output primitive %u.\n", primitive); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader tessellator output primitive %u is unhandled.", primitive); -+ } -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); -+ ins->declaration.tessellator_output_primitive = primitive; -+} -+ -+static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) -+{ -+ struct vkd3d_shader_instruction *ins; -+ float max_tessellation_factor; -+ -+ if (!sm6_metadata_get_float_value(sm6, m, &max_tessellation_factor)) -+ { -+ WARN("Max tess factor property is not a float value.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader max tessellation factor property operand is not a float."); -+ return; -+ } -+ -+ /* Exclude non-finite values. */ -+ if (!(max_tessellation_factor >= 1.0f && max_tessellation_factor <= 64.0f)) -+ { -+ WARN("Invalid max tess factor %f.\n", max_tessellation_factor); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader max tessellation factor %f is invalid.", max_tessellation_factor); -+ } -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_HS_MAX_TESSFACTOR); -+ ins->declaration.max_tessellation_factor = max_tessellation_factor; -+} -+ -+static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_parser *sm6, -+ const struct sm6_metadata_value *m) -+{ -+ const struct sm6_metadata_node *node; -+ unsigned int operands[2] = {0}; -+ unsigned int i; -+ -+ if (!m || !sm6_metadata_value_is_node(m)) -+ { -+ WARN("Missing or invalid DS properties.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Domain shader properties node is missing or invalid."); -+ return 0; -+ } -+ -+ node = m->u.node; -+ if (node->operand_count < ARRAY_SIZE(operands)) -+ { -+ WARN("Invalid operand count %u.\n", node->operand_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Domain shader properties operand count %u is invalid.", node->operand_count); -+ return 0; -+ } -+ if (node->operand_count > ARRAY_SIZE(operands)) -+ { -+ WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %zu extra operands for domain shader properties.", -+ node->operand_count - ARRAY_SIZE(operands)); -+ } -+ -+ for (i = 0; i < node->operand_count; ++i) -+ { -+ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) -+ { -+ WARN("DS property at index %u is not a uint value.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Domain shader properties operand at index %u is not an integer.", i); -+ } -+ } -+ -+ sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); -+ sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); -+ sm6->p.program.input_control_point_count = operands[1]; -+ -+ return operands[0]; -+} -+ -+static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_parser *sm6, -+ const struct sm6_metadata_value *m) -+{ -+ const struct sm6_metadata_node *node; -+ unsigned int operands[6] = {0}; -+ unsigned int i; -+ -+ if (!m || !sm6_metadata_value_is_node(m)) -+ { -+ WARN("Missing or invalid HS properties.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader properties node is missing or invalid."); -+ return 0; -+ } -+ -+ node = m->u.node; -+ if (node->operand_count < 7) -+ { -+ WARN("Invalid operand count %u.\n", node->operand_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Hull shader properties operand count %u is invalid.", node->operand_count); -+ return 0; -+ } -+ if (node->operand_count > 7) -+ { -+ WARN("Ignoring %u extra operands.\n", node->operand_count - 7); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %u extra operands for hull shader properties.", node->operand_count - 7); -+ } -+ -+ m = node->operands[0]; -+ if (!sm6_metadata_value_is_value(m) || !sm6_value_is_function_dcl(m->u.value)) -+ { -+ WARN("Patch constant function node is not a function value.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader patch constant function node is not a function value."); -+ } -+ else -+ { -+ sm6->patch_constant_function = m->u.value->u.function.name; -+ } -+ -+ for (i = 1; i < min(node->operand_count, ARRAY_SIZE(operands)); ++i) -+ { -+ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) -+ { -+ WARN("HS property at index %u is not a uint value.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader properties operand at index %u is not an integer.", i); -+ } -+ } -+ -+ sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); -+ sm6->p.program.input_control_point_count = operands[1]; -+ sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); -+ sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); -+ sm6->p.program.output_control_point_count = operands[2]; -+ sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); -+ sm6_parser_emit_dcl_tessellator_partitioning(sm6, operands[4]); -+ sm6_parser_emit_dcl_tessellator_output_primitive(sm6, operands[5]); -+ sm6_parser_emit_dcl_max_tessellation_factor(sm6, node->operands[6]); -+ -+ return operands[3]; -+} -+ - static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) - { - const struct sm6_metadata_value *m = sm6_parser_find_named_metadata(sm6, "dx.entryPoints"); - const struct sm6_metadata_node *node, *entry_node = m ? m->u.node : NULL; -+ enum vkd3d_tessellator_domain tessellator_domain = 0; - unsigned int i, operand_count, tag; - const struct sm6_value *value; - enum vkd3d_result ret; -@@ -8772,12 +9265,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) - "Entry point function name %s does not match the name in metadata.", sm6->entry_point); - } - -- if (entry_node->operand_count >= 3 && (m = entry_node->operands[2]) -- && (ret = sm6_parser_signatures_init(sm6, m)) < 0) -- { -- return ret; -- } -- - if (entry_node->operand_count >= 5 && (m = entry_node->operands[4])) - { - if (!sm6_metadata_value_is_node(m)) -@@ -8812,6 +9299,12 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) - case SHADER_PROPERTIES_FLAGS: - sm6_parser_emit_global_flags(sm6, node->operands[i + 1]); - break; -+ case SHADER_PROPERTIES_DOMAIN: -+ tessellator_domain = sm6_parser_ds_properties_init(sm6, node->operands[i + 1]); -+ break; -+ case SHADER_PROPERTIES_HULL: -+ tessellator_domain = sm6_parser_hs_properties_init(sm6, node->operands[i + 1]); -+ break; - case SHADER_PROPERTIES_COMPUTE: - if ((ret = sm6_parser_emit_thread_group(sm6, node->operands[i + 1])) < 0) - return ret; -@@ -8825,6 +9318,12 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) - } - } - -+ if (entry_node->operand_count >= 3 && (m = entry_node->operands[2]) -+ && (ret = sm6_parser_signatures_init(sm6, m, tessellator_domain)) < 0) -+ { -+ return ret; -+ } -+ - return VKD3D_OK; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 3977852a48d..f1012d06c6a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -139,12 +139,16 @@ static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, - gen->message_context = message_context; - } - --int glsl_compile(struct vsir_program *program, struct vkd3d_shader_code *out, -+int glsl_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_glsl_generator generator; - int ret; - -+ if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0) -+ return ret; -+ - vkd3d_glsl_generator_init(&generator, program, message_context); - ret = vkd3d_glsl_generator_generate(&generator, out); - vkd3d_glsl_generator_cleanup(&generator); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 610d907d981..eca18f4eb28 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -232,6 +232,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - return ret; - break; - -+ case VKD3DSIH_DCL_CONSTANT_BUFFER: - case VKD3DSIH_DCL_TEMPS: - vkd3d_shader_instruction_make_nop(ins); - break; -@@ -2986,6 +2987,8 @@ struct vsir_cfg_structure - { - struct vsir_cfg_structure_list body; - unsigned idx; -+ bool needs_trampoline; -+ struct vsir_cfg_structure *outer_loop; - } loop; - struct vsir_cfg_structure_selection - { -@@ -3008,6 +3011,7 @@ struct vsir_cfg_structure - unsigned int target; - struct vkd3d_shader_src_param *condition; - bool invert_condition; -+ bool needs_launcher; - } jump; - } u; - }; -@@ -3257,7 +3261,8 @@ static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_struct - - vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body); - -- TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx); -+ TRACE("%s} # %u%s\n", cfg->debug_buffer.buffer, structure->u.loop.idx, -+ structure->u.loop.needs_trampoline ? ", tramp" : ""); - break; - - case STRUCTURE_TYPE_SELECTION: -@@ -3301,8 +3306,9 @@ static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_struct - vkd3d_unreachable(); - } - -- TRACE("%s%s%s %u\n", cfg->debug_buffer.buffer, type_str, -- structure->u.jump.condition ? "c" : "", structure->u.jump.target); -+ TRACE("%s%s%s %u%s\n", cfg->debug_buffer.buffer, type_str, -+ structure->u.jump.condition ? "c" : "", structure->u.jump.target, -+ structure->u.jump.needs_launcher ? " # launch" : ""); - break; - } - -@@ -4268,6 +4274,17 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg - --cfg->loop_intervals[else_target].target_count; - } - -+ /* If a branch becomes empty, make it the else branch, so we save a block. */ -+ if (selection->u.selection.if_body.count == 0) -+ { -+ struct vsir_cfg_structure_list tmp; -+ -+ selection->u.selection.invert_condition = !selection->u.selection.invert_condition; -+ tmp = selection->u.selection.if_body; -+ selection->u.selection.if_body = selection->u.selection.else_body; -+ selection->u.selection.else_body = tmp; -+ } -+ - return VKD3D_OK; - } - -@@ -4492,6 +4509,90 @@ static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structu - } - } - -+/* Trampolines are code gadgets used to emulate multilevel jumps (which are not natively supported -+ * by SPIR-V). A trampoline is inserted just after a loop and checks whether control has reached the -+ * intended site (i.e., we just jumped out of the target block) or if other levels of jumping are -+ * needed. For each jump a trampoline is required for all the loops between the jump itself and the -+ * target loop, excluding the target loop itself. */ -+static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, -+ struct vsir_cfg_structure *loop) -+{ -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i]; -+ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ break; -+ -+ case STRUCTURE_TYPE_LOOP: -+ structure->u.loop.outer_loop = loop; -+ vsir_cfg_mark_trampolines(cfg, &structure->u.loop.body, structure); -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ vsir_cfg_mark_trampolines(cfg, &structure->u.selection.if_body, loop); -+ vsir_cfg_mark_trampolines(cfg, &structure->u.selection.else_body, loop); -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ { -+ struct vsir_cfg_structure *l; -+ if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) -+ break; -+ for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) -+ { -+ assert(l->type == STRUCTURE_TYPE_LOOP); -+ l->u.loop.needs_trampoline = true; -+ } -+ break; -+ } -+ } -+ } -+} -+ -+/* Launchers are the counterpart of trampolines. A launcher is inserted just before a jump, and -+ * writes in a well-known variable what is the target of the jump. Trampolines will then read that -+ * variable to decide how to redirect the jump to its intended target. A launcher is needed each -+ * time the innermost loop containing the jump itself has a trampoline (independently of whether the -+ * jump is targeting that loop or not). */ -+static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, -+ struct vsir_cfg_structure *loop) -+{ -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i]; -+ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ break; -+ -+ case STRUCTURE_TYPE_LOOP: -+ vsir_cfg_mark_launchers(cfg, &structure->u.loop.body, structure); -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ vsir_cfg_mark_launchers(cfg, &structure->u.selection.if_body, loop); -+ vsir_cfg_mark_launchers(cfg, &structure->u.selection.else_body, loop); -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) -+ break; -+ assert(loop && loop->type == STRUCTURE_TYPE_LOOP); -+ if (loop->u.loop.needs_trampoline) -+ structure->u.jump.needs_launcher = true; -+ break; -+ } -+ } -+} -+ - static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) - { - enum vkd3d_result ret; -@@ -4500,6 +4601,14 @@ static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) - - ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); - -+ /* Trampolines and launchers cannot be marked with the same pass, -+ * because a jump might have to be marked as launcher even when it -+ * targets its innermost loop, if other jumps in the same loop -+ * need a trampoline anyway. So launchers can be discovered only -+ * once all the trampolines are known. */ -+ vsir_cfg_mark_trampolines(cfg, &cfg->structured_program, NULL); -+ vsir_cfg_mark_launchers(cfg, &cfg->structured_program, NULL); -+ - if (TRACE_ON()) - vsir_cfg_dump_structured_program(cfg); - -@@ -4548,7 +4657,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg, - - /* Add a trampoline to implement multilevel jumping depending on the stored - * jump_target value. */ -- if (loop_idx != UINT_MAX) -+ if (loop->needs_trampoline) - { - /* If the multilevel jump is a `continue' and the target is the loop we're inside - * right now, then we can finally do the `continue'. */ -@@ -4685,7 +4794,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, - if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -- if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) -+ if (jump->needs_launcher) - { - if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], - &no_loc, VKD3DSIH_MOV, 1, 1)) -@@ -5433,6 +5542,46 @@ static void vsir_validate_instruction(struct validation_context *ctx) - ctx->dcl_temps_found = false; - return; - -+ case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: -+ /* Exclude non-finite values. */ -+ if (!(instruction->declaration.max_tessellation_factor >= 1.0f -+ && instruction->declaration.max_tessellation_factor <= 64.0f)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.", -+ instruction->declaration.max_tessellation_factor); -+ return; -+ -+ /* The DXIL parser can generate these outside phases, but this is not an issue. */ -+ case VKD3DSIH_DCL_INPUT: -+ case VKD3DSIH_DCL_OUTPUT: -+ return; -+ -+ case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: -+ if (!instruction->declaration.count || instruction->declaration.count > 32) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", -+ instruction->declaration.count); -+ return; -+ -+ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: -+ if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID -+ || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); -+ return; -+ -+ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: -+ if (!instruction->declaration.tessellator_output_primitive -+ || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive); -+ return; -+ -+ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: -+ if (!instruction->declaration.tessellator_partitioning -+ || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning); -+ return; -+ - default: - break; - } -@@ -5863,7 +6012,8 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) - return result; - -- if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) -+ if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL -+ && (result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) - return result; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index dfab1cb229b..c4e712b8471 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -45,6 +45,8 @@ static spv_target_env spv_target_env_from_vkd3d(enum vkd3d_shader_spirv_environm - return SPV_ENV_OPENGL_4_5; - case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: - return SPV_ENV_VULKAN_1_0; -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: -+ return SPV_ENV_VULKAN_1_1; - default: - ERR("Invalid environment %#x.\n", environment); - return SPV_ENV_VULKAN_1_0; -@@ -223,12 +225,8 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d - } - } - --static bool data_type_is_floating_point(enum vkd3d_data_type data_type) --{ -- return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; --} -- --#define VKD3D_SPIRV_VERSION 0x00010000 -+#define VKD3D_SPIRV_VERSION_1_0 0x00010000 -+#define VKD3D_SPIRV_VERSION_1_3 0x00010300 - #define VKD3D_SPIRV_GENERATOR_ID 18 - #define VKD3D_SPIRV_GENERATOR_VERSION 11 - #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) -@@ -1920,7 +1918,7 @@ static void vkd3d_spirv_builder_free(struct vkd3d_spirv_builder *builder) - } - - static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, -- struct vkd3d_shader_code *spirv, const char *entry_point) -+ struct vkd3d_shader_code *spirv, const char *entry_point, enum vkd3d_shader_spirv_environment environment) - { - uint64_t capability_mask = builder->capability_mask; - struct vkd3d_spirv_stream stream; -@@ -1931,7 +1929,8 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, - vkd3d_spirv_stream_init(&stream); - - vkd3d_spirv_build_word(&stream, SpvMagicNumber); -- vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_VERSION); -+ vkd3d_spirv_build_word(&stream, (environment == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1) -+ ? VKD3D_SPIRV_VERSION_1_3 : VKD3D_SPIRV_VERSION_1_0); - vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_GENERATOR_MAGIC); - vkd3d_spirv_build_word(&stream, builder->current_id); /* bound */ - vkd3d_spirv_build_word(&stream, 0); /* schema, reserved */ -@@ -2480,6 +2479,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - { - case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: - case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: - break; - default: - WARN("Invalid target environment %#x.\n", target_info->environment); -@@ -9263,7 +9263,11 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type); - - if (instruction->flags & VKD3DARF_VOLATILE) -+ { - WARN("Ignoring 'volatile' attribute.\n"); -+ spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, -+ "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); -+ } - - memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) - ? SpvMemorySemanticsSequentiallyConsistentMask -@@ -10052,7 +10056,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - spirv_compiler_emit_cut_stream(compiler, instruction); - break; - case VKD3DSIH_DCL: -- case VKD3DSIH_DCL_CONSTANT_BUFFER: - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: - case VKD3DSIH_DCL_INPUT_SGV: -@@ -10157,6 +10160,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_shader_instruction_array instructions; - struct vsir_program *program = &parser->program; -+ enum vkd3d_shader_spirv_environment environment; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; - -@@ -10241,12 +10245,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - if (compiler->strip_debug) - vkd3d_spirv_stream_clear(&builder->debug_stream); - -- if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler))) -+ environment = spirv_compiler_get_target_environment(compiler); -+ if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler), environment)) - return VKD3D_ERROR; - - if (TRACE_ON() || parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) - { -- enum vkd3d_shader_spirv_environment environment = spirv_compiler_get_target_environment(compiler); - struct vkd3d_string_buffer buffer; - - if (TRACE_ON()) -@@ -10274,7 +10278,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) - { - struct vkd3d_shader_code text; -- enum vkd3d_shader_spirv_environment environment = spirv_compiler_get_target_environment(compiler); - if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) - return VKD3D_ERROR; - vkd3d_shader_free_shader_code(spirv); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 81ac84896d4..3cc32ced280 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1576,7 +1576,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - case VKD3D_SHADER_TARGET_GLSL: - if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) - return ret; -- ret = glsl_compile(program, out, message_context); -+ ret = glsl_compile(program, parser->config_flags, compile_info, out, message_context); - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 9b37bbef70b..36eb903ed84 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -100,6 +100,7 @@ enum vkd3d_shader_error - - VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, - VKD3D_SHADER_WARNING_SPV_INVALID_UAV_FLAGS = 2301, -+ VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG = 2302, - - VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, - VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, -@@ -220,6 +221,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, - VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW = 9016, - VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION = 9018, - - VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, - }; -@@ -641,6 +643,11 @@ static inline bool data_type_is_bool(enum vkd3d_data_type data_type) - return data_type == VKD3D_DATA_BOOL; - } - -+static inline bool data_type_is_floating_point(enum vkd3d_data_type data_type) -+{ -+ return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; -+} -+ - static inline bool data_type_is_64_bit(enum vkd3d_data_type data_type) - { - return data_type == VKD3D_DATA_DOUBLE || data_type == VKD3D_DATA_UINT64; -@@ -763,9 +770,13 @@ enum vkd3d_shader_atomic_rmw_flags - - enum vkd3d_tessellator_domain - { -+ VKD3D_TESSELLATOR_DOMAIN_INVALID = 0, -+ - VKD3D_TESSELLATOR_DOMAIN_LINE = 1, - VKD3D_TESSELLATOR_DOMAIN_TRIANGLE = 2, - VKD3D_TESSELLATOR_DOMAIN_QUAD = 3, -+ -+ VKD3D_TESSELLATOR_DOMAIN_COUNT = 4, - }; - - #define VKD3DSI_NONE 0x0 -@@ -1527,7 +1538,8 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); - --int glsl_compile(struct vsir_program *program, struct vkd3d_shader_code *out, -+int glsl_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context); - - #define SPIRV_MAX_SRC_COUNT 6 -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 7115a74a6f2..4a69ff530da 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -5414,6 +5414,26 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 - | ((colour->uint32[2] & 0x3ff) << 22); - return vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); - -+ case DXGI_FORMAT_B5G6R5_UNORM: -+ colour->uint32[0] = (colour->uint32[2] & 0x1f) -+ | ((colour->uint32[1] & 0x3f) << 5) -+ | ((colour->uint32[0] & 0x1f) << 11); -+ return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); -+ -+ case DXGI_FORMAT_B5G5R5A1_UNORM: -+ colour->uint32[0] = (colour->uint32[2] & 0x1f) -+ | ((colour->uint32[1] & 0x1f) << 5) -+ | ((colour->uint32[0] & 0x1f) << 10) -+ | ((colour->uint32[3] & 0x1) << 15); -+ return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); -+ -+ case DXGI_FORMAT_B4G4R4A4_UNORM: -+ colour->uint32[0] = (colour->uint32[2] & 0xf) -+ | ((colour->uint32[1] & 0xf) << 4) -+ | ((colour->uint32[0] & 0xf) << 8) -+ | ((colour->uint32[3] & 0xf) << 12); -+ return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); -+ - default: - return NULL; - } -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 90de27c53b6..a394e3f7592 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -89,6 +89,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), - VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), - /* EXT extensions */ -+ VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), - VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), - VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), - VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), -@@ -558,12 +559,14 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - const struct vkd3d_optional_instance_extensions_info *optional_extensions; - const struct vkd3d_application_info *vkd3d_application_info; - const struct vkd3d_host_time_domain_info *time_domain_info; -+ PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion; - bool *user_extension_supported = NULL; - VkApplicationInfo application_info; - VkInstanceCreateInfo instance_info; - char application_name[PATH_MAX]; - uint32_t extension_count; - const char **extensions; -+ uint32_t vk_api_version; - VkInstance vk_instance; - VkResult vr; - HRESULT hr; -@@ -616,6 +619,16 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - application_info.apiVersion = VK_API_VERSION_1_0; - instance->api_version = VKD3D_API_VERSION_1_0; - -+ /* vkEnumerateInstanceVersion was added in Vulkan 1.1, and its absence indicates only 1.0 is supported. */ -+ vkEnumerateInstanceVersion = (void *)vk_global_procs->vkGetInstanceProcAddr(NULL, "vkEnumerateInstanceVersion"); -+ if (vkEnumerateInstanceVersion && vkEnumerateInstanceVersion(&vk_api_version) >= 0 -+ && vk_api_version >= VK_API_VERSION_1_1) -+ { -+ TRACE("Vulkan API version 1.1 is available; requesting it.\n"); -+ application_info.apiVersion = VK_API_VERSION_1_1; -+ } -+ instance->vk_api_version = application_info.apiVersion; -+ - if ((vkd3d_application_info = vkd3d_find_struct(create_info->next, APPLICATION_INFO))) - { - if (vkd3d_application_info->application_name) -@@ -798,6 +811,7 @@ struct vkd3d_physical_device_info - VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; - VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features; - VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT mutable_features; -+ VkPhysicalDevice4444FormatsFeaturesEXT formats4444_features; - - VkPhysicalDeviceFeatures2 features2; - }; -@@ -821,6 +835,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; - VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; - VkPhysicalDevice physical_device = device->vk_physical_device; -+ VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; - VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; - struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; - -@@ -839,6 +854,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - vertex_divisor_properties = &info->vertex_divisor_properties; - timeline_semaphore_features = &info->timeline_semaphore_features; - mutable_features = &info->mutable_features; -+ formats4444_features = &info->formats4444_features; - xfb_features = &info->xfb_features; - xfb_properties = &info->xfb_properties; - -@@ -866,6 +882,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - vk_prepend_struct(&info->features2, timeline_semaphore_features); - mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; - vk_prepend_struct(&info->features2, mutable_features); -+ formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, formats4444_features); - - if (vulkan_info->KHR_get_physical_device_properties2) - VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); -@@ -1655,6 +1673,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - if (!physical_device_info->timeline_semaphore_features.timelineSemaphore) - vulkan_info->KHR_timeline_semaphore = false; - -+ physical_device_info->formats4444_features.formatA4B4G4R4 = VK_FALSE; -+ - vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; - - if (get_spec_version(vk_extensions, count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) -@@ -5117,6 +5137,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, - device->vk_info = instance->vk_info; - device->signal_event = instance->signal_event; - device->wchar_size = instance->wchar_size; -+ device->environment = (instance->vk_api_version >= VK_API_VERSION_1_1) -+ ? VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1 : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; - - device->adapter_luid = create_info->adapter_luid; - device->removed_reason = S_OK; -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index b8328216a29..6ba29c18004 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -2452,7 +2452,7 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st - - memset(&target_info, 0, sizeof(target_info)); - target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; -- target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; -+ target_info.environment = device->environment; - target_info.extensions = device->vk_info.shader_extensions; - target_info.extension_count = device->vk_info.shader_extension_count; - -@@ -3177,7 +3177,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s - ps_target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; - ps_target_info.next = NULL; - ps_target_info.entry_point = "main"; -- ps_target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; -+ ps_target_info.environment = device->environment; - ps_target_info.extensions = vk_info->shader_extensions; - ps_target_info.extension_count = vk_info->shader_extension_count; - ps_target_info.parameters = ps_shader_parameters; -@@ -3207,7 +3207,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s - - memset(&target_info, 0, sizeof(target_info)); - target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; -- target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; -+ target_info.environment = device->environment; - target_info.extensions = vk_info->shader_extensions; - target_info.extension_count = vk_info->shader_extension_count; - -diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c -index ac79ae5ddff..58747342b5c 100644 ---- a/libs/vkd3d/libs/vkd3d/utils.c -+++ b/libs/vkd3d/libs/vkd3d/utils.c -@@ -87,6 +87,8 @@ static const struct vkd3d_format vkd3d_formats[] = - {DXGI_FORMAT_R8_SNORM, VK_FORMAT_R8_SNORM, 1, 1, 1, 1, COLOR, 1}, - {DXGI_FORMAT_R8_SINT, VK_FORMAT_R8_SINT, 1, 1, 1, 1, COLOR, 1, SINT}, - {DXGI_FORMAT_A8_UNORM, VK_FORMAT_R8_UNORM, 1, 1, 1, 1, COLOR, 1}, -+ {DXGI_FORMAT_B5G6R5_UNORM, VK_FORMAT_R5G6B5_UNORM_PACK16, 2, 1, 1, 1, COLOR, 1}, -+ {DXGI_FORMAT_B5G5R5A1_UNORM, VK_FORMAT_A1R5G5B5_UNORM_PACK16, 2, 1, 1, 1, COLOR, 1}, - {DXGI_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, - {DXGI_FORMAT_B8G8R8X8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, - {DXGI_FORMAT_B8G8R8A8_TYPELESS, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1, TYPELESS}, -@@ -116,6 +118,9 @@ static const struct vkd3d_format vkd3d_formats[] = - {DXGI_FORMAT_BC7_UNORM_SRGB, VK_FORMAT_BC7_SRGB_BLOCK, 1, 4, 4, 16, COLOR, 1}, - }; - -+static const struct vkd3d_format format_b4g4r4a4 = -+ {DXGI_FORMAT_B4G4R4A4_UNORM, VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, 2, 1, 1, 1, COLOR, 1}; -+ - /* Each depth/stencil format is only compatible with itself in Vulkan. */ - static const struct vkd3d_format vkd3d_depth_stencil_formats[] = - { -@@ -449,6 +454,11 @@ const struct vkd3d_format *vkd3d_get_format(const struct d3d12_device *device, - return &vkd3d_formats[i]; - } - -+ /* Do not check VkPhysicalDevice4444FormatsFeaturesEXT because apps -+ * should query format support, which returns more detailed info. */ -+ if (dxgi_format == format_b4g4r4a4.dxgi_format && device->vk_info.EXT_4444_formats) -+ return &format_b4g4r4a4; -+ - return NULL; - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index e0345deaa0f..29305fbdc63 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -453,11 +453,10 @@ HRESULT vkd3d_serialize_root_signature(const D3D12_ROOT_SIGNATURE_DESC *desc, - if ((ret = vkd3d_shader_serialize_root_signature(&vkd3d_desc, &dxbc, &messages)) < 0) - { - WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); -- if (error_blob && messages) -- { -- if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) -- ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); -- } -+ if (!error_blob) -+ vkd3d_shader_free_messages(messages); -+ else if (messages && FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) -+ ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); - return hresult_from_vkd3d_result(ret); - } - vkd3d_shader_free_messages(messages); -@@ -494,11 +493,10 @@ HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED_ROOT_SIGN - if ((ret = vkd3d_shader_serialize_root_signature(vkd3d_desc, &dxbc, &messages)) < 0) - { - WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); -- if (error_blob && messages) -- { -- if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) -- ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); -- } -+ if (!error_blob) -+ vkd3d_shader_free_messages(messages); -+ else if (messages && FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) -+ ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); - return hresult_from_vkd3d_result(ret); - } - vkd3d_shader_free_messages(messages); -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 9f5f759667a..e0a7acb306d 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -128,6 +128,7 @@ struct vkd3d_vulkan_info - bool KHR_sampler_mirror_clamp_to_edge; - bool KHR_timeline_semaphore; - /* EXT device extensions */ -+ bool EXT_4444_formats; - bool EXT_calibrated_timestamps; - bool EXT_conditional_rendering; - bool EXT_debug_marker; -@@ -185,6 +186,7 @@ struct vkd3d_instance - struct vkd3d_vulkan_info vk_info; - struct vkd3d_vk_global_procs vk_global_procs; - void *libvulkan; -+ uint32_t vk_api_version; - - uint64_t config_flags; - enum vkd3d_api_version api_version; -@@ -1678,6 +1680,7 @@ struct d3d12_device - struct vkd3d_vk_device_procs vk_procs; - PFN_vkd3d_signal_event signal_event; - size_t wchar_size; -+ enum vkd3d_shader_spirv_environment environment; - - struct vkd3d_gpu_va_allocator gpu_va_allocator; - --- -2.43.0 -