diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-9c83caeda652d7968c10e54cca2ae3b7fc1.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-9c83caeda652d7968c10e54cca2ae3b7fc1.patch deleted file mode 100644 index b3c21b8d..00000000 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-9c83caeda652d7968c10e54cca2ae3b7fc1.patch +++ /dev/null @@ -1,24033 +0,0 @@ -From 2089c41afa2160961884895871e674e743cb5144 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 7 Mar 2024 10:40:41 +1100 -Subject: [PATCH] Updated vkd3d to 9c83caeda652d7968c10e54cca2ae3b7fc18f384. - ---- - libs/vkd3d/Makefile.in | 1 + - libs/vkd3d/include/private/vkd3d_common.h | 261 +- - libs/vkd3d/include/private/vkd3d_memory.h | 11 +- - libs/vkd3d/include/vkd3d.h | 206 +- - libs/vkd3d/include/vkd3d_shader.h | 69 +- - libs/vkd3d/include/vkd3d_types.h | 2 + - libs/vkd3d/libs/vkd3d-common/blob.c | 3 +- - libs/vkd3d/libs/vkd3d-common/debug.c | 2 +- - libs/vkd3d/libs/vkd3d-common/error.c | 1 - - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 1190 +++--- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 290 +- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 39 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 2634 +++++++++++-- - libs/vkd3d/libs/vkd3d-shader/fx.c | 994 ++++- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 109 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 723 ++-- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 162 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 7 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 904 +++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 861 ++++- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 317 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 3271 +++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 727 +++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 421 +-- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 324 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 192 +- - libs/vkd3d/libs/vkd3d/cache.c | 59 + - libs/vkd3d/libs/vkd3d/command.c | 41 +- - libs/vkd3d/libs/vkd3d/device.c | 1034 +++++- - libs/vkd3d/libs/vkd3d/resource.c | 21 +- - libs/vkd3d/libs/vkd3d/state.c | 36 +- - libs/vkd3d/libs/vkd3d/utils.c | 34 + - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 22 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 176 +- - 34 files changed, 11642 insertions(+), 3502 deletions(-) - create mode 100644 libs/vkd3d/libs/vkd3d/cache.c - -diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in -index 448e9a0e61d..94e4833dc9a 100644 ---- a/libs/vkd3d/Makefile.in -+++ b/libs/vkd3d/Makefile.in -@@ -30,6 +30,7 @@ SOURCES = \ - libs/vkd3d-shader/spirv.c \ - libs/vkd3d-shader/tpf.c \ - libs/vkd3d-shader/vkd3d_shader_main.c \ -+ libs/vkd3d/cache.c \ - libs/vkd3d/command.c \ - libs/vkd3d/device.c \ - libs/vkd3d/resource.c \ -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 6a3b530d868..b0e9230dab6 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -30,6 +30,9 @@ - #include - #include - #include -+#ifndef _WIN32 -+#include -+#endif - - #ifdef _MSC_VER - #include -@@ -72,6 +75,8 @@ - #define TAG_XNAP VKD3D_MAKE_TAG('X', 'N', 'A', 'P') - #define TAG_XNAS VKD3D_MAKE_TAG('X', 'N', 'A', 'S') - -+#define TAG_RD11_REVERSE 0x25441313 -+ - static inline uint64_t align(uint64_t addr, size_t alignment) - { - return (addr + (alignment - 1)) & ~(alignment - 1); -@@ -105,11 +110,130 @@ VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsig - #define vkd3d_unreachable() vkd3d_unreachable_(__FILE__, __LINE__) - #endif - -+#ifdef VKD3D_NO_TRACE_MESSAGES -+#define TRACE(args...) do { } while (0) -+#define TRACE_ON() (false) -+#endif -+ -+#ifdef VKD3D_NO_DEBUG_MESSAGES -+#define WARN(args...) do { } while (0) -+#define FIXME(args...) do { } while (0) -+#endif -+ -+enum vkd3d_dbg_level -+{ -+ VKD3D_DBG_LEVEL_NONE, -+ VKD3D_DBG_LEVEL_ERR, -+ VKD3D_DBG_LEVEL_FIXME, -+ VKD3D_DBG_LEVEL_WARN, -+ VKD3D_DBG_LEVEL_TRACE, -+}; -+ -+enum vkd3d_dbg_level vkd3d_dbg_get_level(void); -+ -+void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); -+void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback); -+ -+const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2); -+const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args); -+const char *debugstr_a(const char *str); -+const char *debugstr_an(const char *str, size_t n); -+const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); -+ -+#define VKD3D_DBG_LOG(level) \ -+ do { \ -+ const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ -+ VKD3D_DBG_PRINTF -+ -+#define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ -+ do { \ -+ static bool vkd3d_dbg_next_time; \ -+ const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ -+ ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ -+ vkd3d_dbg_next_time = true; \ -+ VKD3D_DBG_PRINTF -+ -+#define VKD3D_DBG_PRINTF(...) \ -+ vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) -+ -+#ifndef TRACE -+#define TRACE VKD3D_DBG_LOG(TRACE) -+#endif -+ -+#ifndef WARN -+#define WARN VKD3D_DBG_LOG(WARN) -+#endif -+ -+#ifndef FIXME -+#define FIXME VKD3D_DBG_LOG(FIXME) -+#endif -+ -+#define ERR VKD3D_DBG_LOG(ERR) -+ -+#ifndef TRACE_ON -+#define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) -+#endif -+ -+#ifndef WARN_ON -+#define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) -+#endif -+ -+#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) -+ -+#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name -+ -+static inline const char *debugstr_guid(const GUID *guid) -+{ -+ if (!guid) -+ return "(null)"; -+ -+ return vkd3d_dbg_sprintf("{%08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}", -+ (unsigned long)guid->Data1, guid->Data2, guid->Data3, guid->Data4[0], -+ guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4], -+ guid->Data4[5], guid->Data4[6], guid->Data4[7]); -+} -+ -+static inline const char *debugstr_hresult(HRESULT hr) -+{ -+ switch (hr) -+ { -+#define TO_STR(u) case u: return #u; -+ TO_STR(S_OK) -+ TO_STR(S_FALSE) -+ TO_STR(E_NOTIMPL) -+ TO_STR(E_NOINTERFACE) -+ TO_STR(E_POINTER) -+ TO_STR(E_ABORT) -+ TO_STR(E_FAIL) -+ TO_STR(E_OUTOFMEMORY) -+ TO_STR(E_INVALIDARG) -+ TO_STR(DXGI_ERROR_NOT_FOUND) -+ TO_STR(DXGI_ERROR_MORE_DATA) -+ TO_STR(DXGI_ERROR_UNSUPPORTED) -+#undef TO_STR -+ default: -+ return vkd3d_dbg_sprintf("%#x", (int)hr); -+ } -+} -+ -+unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value); -+ -+struct vkd3d_debug_option -+{ -+ const char *name; -+ uint64_t flag; -+}; -+ -+bool vkd3d_debug_list_has_member(const char *string, const char *member); -+uint64_t vkd3d_parse_debug_options(const char *string, -+ const struct vkd3d_debug_option *options, unsigned int option_count); -+void vkd3d_set_thread_name(const char *name); -+ - static inline unsigned int vkd3d_popcount(unsigned int v) - { - #ifdef _MSC_VER - return __popcnt(v); --#elif defined(__MINGW32__) -+#elif defined(HAVE_BUILTIN_POPCOUNT) - return __builtin_popcount(v); - #else - v -= (v >> 1) & 0x55555555; -@@ -216,6 +340,8 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) - return (x > y) - (x < y); - } - -+#define VKD3D_BITMAP_SIZE(x) (((x) + 0x1f) >> 5) -+ - static inline bool bitmap_clear(uint32_t *map, unsigned int idx) - { - return map[idx >> 5] &= ~(1u << (idx & 0x1f)); -@@ -305,6 +431,139 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) - return vkd3d_atomic_add_fetch_u32(x, 1); - } - -+struct vkd3d_mutex -+{ -+#ifdef _WIN32 -+ CRITICAL_SECTION lock; -+#else -+ pthread_mutex_t lock; -+#endif -+}; -+ -+#ifdef _WIN32 -+#define VKD3D_MUTEX_INITIALIZER {{NULL, -1, 0, 0, 0, 0}} -+#else -+#define VKD3D_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER -+#endif -+ -+static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ InitializeCriticalSection(&lock->lock); -+#else -+ int ret; -+ -+ if ((ret = pthread_mutex_init(&lock->lock, NULL))) -+ ERR("Failed to initialise the mutex, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ EnterCriticalSection(&lock->lock); -+#else -+ int ret; -+ -+ if ((ret = pthread_mutex_lock(&lock->lock))) -+ ERR("Failed to lock the mutex, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ LeaveCriticalSection(&lock->lock); -+#else -+ int ret; -+ -+ if ((ret = pthread_mutex_unlock(&lock->lock))) -+ ERR("Failed to unlock the mutex, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ DeleteCriticalSection(&lock->lock); -+#else -+ int ret; -+ -+ if ((ret = pthread_mutex_destroy(&lock->lock))) -+ ERR("Failed to destroy the mutex, ret %d.\n", ret); -+#endif -+} -+ -+struct vkd3d_cond -+{ -+#ifdef _WIN32 -+ CONDITION_VARIABLE cond; -+#else -+ pthread_cond_t cond; -+#endif -+}; -+ -+static inline void vkd3d_cond_init(struct vkd3d_cond *cond) -+{ -+#ifdef _WIN32 -+ InitializeConditionVariable(&cond->cond); -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_init(&cond->cond, NULL))) -+ ERR("Failed to initialise the condition variable, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) -+{ -+#ifdef _WIN32 -+ WakeConditionVariable(&cond->cond); -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_signal(&cond->cond))) -+ ERR("Failed to signal the condition variable, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) -+{ -+#ifdef _WIN32 -+ WakeAllConditionVariable(&cond->cond); -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_broadcast(&cond->cond))) -+ ERR("Failed to broadcast the condition variable, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) -+ ERR("Failed to wait on the condition variable, error %lu.\n", GetLastError()); -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_wait(&cond->cond, &lock->lock))) -+ ERR("Failed to wait on the condition variable, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) -+{ -+#ifdef _WIN32 -+ /* Nothing to do. */ -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_destroy(&cond->cond))) -+ ERR("Failed to destroy the condition variable, ret %d.\n", ret); -+#endif -+} -+ - static inline void vkd3d_parse_version(const char *version, int *major, int *minor) - { - *major = atoi(version); -diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h -index 8a2edb1000d..682d35c03c6 100644 ---- a/libs/vkd3d/include/private/vkd3d_memory.h -+++ b/libs/vkd3d/include/private/vkd3d_memory.h -@@ -24,7 +24,7 @@ - #include - #include - --#include "vkd3d_debug.h" -+#include "vkd3d_common.h" - - static inline void *vkd3d_malloc(size_t size) - { -@@ -65,6 +65,15 @@ static inline char *vkd3d_strdup(const char *string) - return ptr; - } - -+static inline void *vkd3d_memdup(const void *mem, size_t size) -+{ -+ void *ptr; -+ -+ if ((ptr = vkd3d_malloc(size))) -+ memcpy(ptr, mem, size); -+ return ptr; -+} -+ - bool vkd3d_array_reserve(void **elements, size_t *capacity, size_t element_count, size_t element_size); - - #endif /* __VKD3D_MEMORY_H */ -diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index aa68b70e1bf..38249f0bf5c 100644 ---- a/libs/vkd3d/include/vkd3d.h -+++ b/libs/vkd3d/include/vkd3d.h -@@ -46,21 +46,37 @@ extern "C" { - * \since 1.0 - */ - -+/** The type of a chained structure. */ - enum vkd3d_structure_type - { -- /* 1.0 */ -+ /** The structure is a vkd3d_instance_create_info structure. */ - VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, -+ /** The structure is a vkd3d_device_create_info structure. */ - VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO, -+ /** The structure is a vkd3d_image_resource_create_info structure. */ - VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO, - -- /* 1.1 */ -+ /** -+ * The structure is a vkd3d_optional_instance_extensions_info structure. -+ * \since 1.1 -+ */ - VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO, - -- /* 1.2 */ -+ /** -+ * The structure is a vkd3d_optional_device_extensions_info structure. -+ * \since 1.2 -+ */ - VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO, -+ /** -+ * The structure is a vkd3d_application_info structure. -+ * \since 1.2 -+ */ - VKD3D_STRUCTURE_TYPE_APPLICATION_INFO, - -- /* 1.3 */ -+ /** -+ * The structure is a vkd3d_host_time_domain_info structure. -+ * \since 1.3 -+ */ - VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_STRUCTURE_TYPE), -@@ -93,98 +109,262 @@ typedef HRESULT (*PFN_vkd3d_join_thread)(void *thread); - - struct vkd3d_instance; - -+/** -+ * A chained structure containing instance creation parameters. -+ */ - struct vkd3d_instance_create_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** An pointer to a function to signal events. */ - PFN_vkd3d_signal_event pfn_signal_event; -+ /** -+ * An optional pointer to a function to create threads. If this is NULL vkd3d will use a -+ * function of its choice, depending on the platform. It must be NULL if and only if -+ * pfn_join_thread is NULL. -+ */ - PFN_vkd3d_create_thread pfn_create_thread; -+ /** -+ * An optional pointer to a function to join threads. If this is NULL vkd3d will use a -+ * function of its choice, depending on the platform. It must be NULL if and only if -+ * pfn_create_thread is NULL. -+ */ - PFN_vkd3d_join_thread pfn_join_thread; -+ /** The size of type WCHAR. It must be 2 or 4 and should normally be set to sizeof(WCHAR). */ - size_t wchar_size; - -- /* If set to NULL, libvkd3d loads libvulkan. */ -+ /** -+ * A pointer to the vkGetInstanceProcAddr Vulkan function, which will be used to load all the -+ * other Vulkan functions. If set to NULL, vkd3d will search and use the Vulkan loader. -+ */ - PFN_vkGetInstanceProcAddr pfn_vkGetInstanceProcAddr; - -+ /** -+ * A list of Vulkan instance extensions to request. They are intended as required, so instance -+ * creation will fail if any of them is not available. -+ */ - const char * const *instance_extensions; -+ /** The number of elements in the instance_extensions array. */ - uint32_t instance_extension_count; - }; - --/* Extends vkd3d_instance_create_info. Available since 1.1. */ -+/** -+ * A chained structure to specify optional instance extensions. -+ * -+ * This structure extends vkd3d_instance_create_info. -+ * -+ * \since 1.1 -+ */ - struct vkd3d_optional_instance_extensions_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** -+ * A list of optional Vulkan instance extensions to request. Instance creation does not fail if -+ * they are not available. -+ */ - const char * const *extensions; -+ /** The number of elements in the extensions array. */ - uint32_t extension_count; - }; - --/* Extends vkd3d_instance_create_info. Available since 1.2. */ -+/** -+ * A chained structure to specify application information. -+ * -+ * This structure extends vkd3d_instance_create_info. -+ * -+ * \since 1.2 -+ */ - struct vkd3d_application_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_APPLICATION_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** -+ * The application's name, to be passed to the Vulkan implementation. If it is NULL, a name is -+ * computed from the process executable filename. If that cannot be done, the empty string is -+ * used. -+ */ - const char *application_name; -+ /** The application's version, to be passed to the Vulkan implementation. */ - uint32_t application_version; - -- const char *engine_name; /* "vkd3d" if NULL */ -- uint32_t engine_version; /* vkd3d version if engine_name is NULL */ -- -+ /** -+ * The engine name, to be passed to the Vulkan implementation. If it is NULL, "vkd3d" is used. -+ */ -+ const char *engine_name; -+ /** -+ * The engine version, to be passed to the Vulkan implementation. If it is 0, the version is -+ * computed from the vkd3d library version. -+ */ -+ uint32_t engine_version; -+ -+ /** -+ * The vkd3d API version to use, to guarantee backward compatibility of the shared library. If -+ * this chained structure is not used then VKD3D_API_VERSION_1_0 is used. -+ */ - enum vkd3d_api_version api_version; - }; - --/* Extends vkd3d_instance_create_info. Available since 1.3. */ -+/** -+ * A chained structure to specify the host time domain. -+ * -+ * This structure extends vkd3d_instance_create_info. -+ * -+ * \since 1.3 -+ */ - struct vkd3d_host_time_domain_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** -+ * The number of clock ticks per second, used for GetClockCalibration(). It should normally -+ * match the expected result of QueryPerformanceFrequency(). If this chained structure is not -+ * used then 10 millions is used, which means that each tick is a tenth of microsecond, or -+ * equivalently 100 nanoseconds. -+ */ - uint64_t ticks_per_second; - }; - -+/** -+ * A chained structure containing device creation parameters. -+ */ - struct vkd3d_device_create_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** The minimum feature level to request. Device creation will fail with E_INVALIDARG if the -+ * Vulkan device doesn't have the features needed to fulfill the request. */ - D3D_FEATURE_LEVEL minimum_feature_level; - -+ /** -+ * The vkd3d instance to use to create a device. Either this or instance_create_info must be -+ * set. -+ */ - struct vkd3d_instance *instance; -+ /** -+ * The parameters used to create an instance, which is then used to create a device. Either -+ * this or instance must be set. -+ */ - const struct vkd3d_instance_create_info *instance_create_info; - -+ /** -+ * The Vulkan physical device to use. If it is NULL, the first physical device found is used, -+ * prioritizing discrete GPUs over integrated GPUs and integrated GPUs over all the others. -+ * -+ * This parameter can be overridden by setting environment variable VKD3D_VULKAN_DEVICE. -+ */ - VkPhysicalDevice vk_physical_device; - -+ /** -+ * A list of Vulkan device extensions to request. They are intended as required, so device -+ * creation will fail if any of them is not available. -+ */ - const char * const *device_extensions; -+ /** The number of elements in the device_extensions array. */ - uint32_t device_extension_count; - -+ /** -+ * An object to be set as the device parent. This is not used by vkd3d except for being -+ * returned by vkd3d_get_device_parent. -+ */ - IUnknown *parent; -+ /** -+ * The adapter LUID to be set for the device. This is not used by vkd3d except for being -+ * returned by GetAdapterLuid. -+ */ - LUID adapter_luid; - }; - --/* Extends vkd3d_device_create_info. Available since 1.2. */ -+/** -+ * A chained structure to specify optional device extensions. -+ * -+ * This structure extends vkd3d_device_create_info. -+ * -+ * \since 1.2 -+ */ - struct vkd3d_optional_device_extensions_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** -+ * A list of optional Vulkan device extensions to request. Device creation does not fail if -+ * they are not available. -+ */ - const char * const *extensions; -+ /** The number of elements in the extensions array. */ - uint32_t extension_count; - }; - --/* vkd3d_image_resource_create_info flags */ -+/** -+ * When specified as a flag of vkd3d_image_resource_create_info, it means that vkd3d will do the -+ * initial transition operation on the image from VK_IMAGE_LAYOUT_UNDEFINED to its appropriate -+ * Vulkan layout (depending on its D3D12 resource state). If this flag is not specified the caller -+ * is responsible for transitioning the Vulkan image to the appropriate layout. -+ */ - #define VKD3D_RESOURCE_INITIAL_STATE_TRANSITION 0x00000001 -+/** -+ * When specified as a flag of vkd3d_image_resource_create_info, it means that field present_state -+ * is honored. -+ */ - #define VKD3D_RESOURCE_PRESENT_STATE_TRANSITION 0x00000002 - -+/** -+ * A chained structure containing the parameters to create a D3D12 resource backed by a Vulkan -+ * image. -+ */ - struct vkd3d_image_resource_create_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** The Vulkan image that backs the resource. */ - VkImage vk_image; -+ /** The resource description. */ - D3D12_RESOURCE_DESC desc; -+ /** -+ * A combination of zero or more flags. The valid flags are -+ * VKD3D_RESOURCE_INITIAL_STATE_TRANSITION and VKD3D_RESOURCE_PRESENT_STATE_TRANSITION. -+ */ - unsigned int flags; -+ /** -+ * This field specifies how to handle resource state D3D12_RESOURCE_STATE_PRESENT for -+ * the resource. Notice that on D3D12 there is no difference between -+ * D3D12_RESOURCE_STATE_COMMON and D3D12_RESOURCE_STATE_PRESENT (they have the same value), -+ * while on Vulkan two different layouts are used (VK_IMAGE_LAYOUT_GENERAL and -+ * VK_IMAGE_LAYOUT_PRESENT_SRC_KHR). -+ * -+ * * When flag VKD3D_RESOURCE_PRESENT_STATE_TRANSITION is not specified, field -+ * present_state is ignored and resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is -+ * mapped to VK_IMAGE_LAYOUT_GENERAL; this is useful for non-swapchain resources. -+ * * Otherwise, when present_state is D3D12_RESOURCE_STATE_PRESENT/_COMMON, resource state -+ * D3D12_RESOURCE_STATE_COMMON/_PRESENT is mapped to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; -+ * this is useful for swapchain resources that are directly backed by a Vulkan swapchain -+ * image. -+ * * Otherwise, resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is treated as resource -+ * state present_state; this is useful for swapchain resources that backed by a Vulkan -+ * non-swapchain image, which the client will likely consume with a copy or drawing -+ * operation at presentation time. -+ */ - D3D12_RESOURCE_STATES present_state; - }; - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 9e663919c38..2b32b8a3e98 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -21,6 +21,7 @@ - - #include - #include -+#include - #include - - #ifdef __cplusplus -@@ -148,6 +149,12 @@ enum vkd3d_shader_compile_option_formatting_flags - VKD3D_SHADER_COMPILE_OPTION_FORMATTING_OFFSETS = 0x00000004, - VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER = 0x00000008, - VKD3D_SHADER_COMPILE_OPTION_FORMATTING_RAW_IDS = 0x00000010, -+ /** -+ * Emit the signatures when disassembling a shader. -+ * -+ * \since 1.12 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES = 0x00000020, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), - }; -@@ -208,10 +215,33 @@ enum vkd3d_shader_compile_option_feature_flags - * This corresponds to the "shaderFloat64" feature in the Vulkan API, and - * the "GL_ARB_gpu_shader_fp64" extension in the OpenGL API. */ - VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64 = 0x00000002, -+ /** The SPIR-V target environment supports wave operations. -+ * This flag is valid only in VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1 -+ * or greater, and corresponds to the following minimum requirements in -+ * VkPhysicalDeviceSubgroupProperties: -+ * - subgroupSize >= 4. -+ * - supportedOperations has BASIC, VOTE, ARITHMETIC, BALLOT, SHUFFLE and -+ * QUAD bits set. -+ * - supportedStages include COMPUTE and FRAGMENT. \since 1.12 */ -+ VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS = 0x00000004, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLAGS), - }; - -+/** -+ * Flags for vkd3d_shader_parse_dxbc(). -+ * -+ * \since 1.12 -+ */ -+enum vkd3d_shader_parse_dxbc_flags -+{ -+ /** Ignore the checksum and continue parsing even if it is -+ * incorrect. */ -+ VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM = 0x00000001, -+ -+ VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARSE_DXBC_FLAGS), -+}; -+ - enum vkd3d_shader_compile_option_name - { - /** -@@ -279,6 +309,36 @@ enum vkd3d_shader_compile_option_name - * \since 1.11 - */ - VKD3D_SHADER_COMPILE_OPTION_FEATURE = 0x0000000a, -+ /** -+ * If \a value is non-zero compilation will produce a child effect using -+ * shared object descriptions, as instructed by the "shared" modifier. -+ * Child effects are supported with fx_4_0, and fx_4_1 profiles. This option -+ * and "shared" modifiers are ignored for the fx_5_0 profile and non-fx profiles. -+ * The fx_2_0 profile does not have a separate concept of child effects, variables -+ * marked with "shared" modifier will be marked as such in a binary. -+ * -+ * \since 1.12 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT = 0x0000000b, -+ /** -+ * If \a value is nonzero, emit a compile warning warn when vectors or -+ * matrices are truncated in an implicit conversion. -+ * If warnings are disabled, this option has no effect. -+ * This option has no effects for targets other than HLSL. -+ * -+ * The default value is nonzero, i.e. enable implicit truncation warnings. -+ * -+ * \since 1.12 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION = 0x0000000c, -+ /** -+ * If \a value is nonzero, empty constant buffers descriptions are -+ * written out in the output effect binary. This option applies only -+ * to fx_4_0 and fx_4_1 profiles and is otherwise ignored. -+ * -+ * \since 1.12 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS = 0x0000000d, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), - }; -@@ -872,6 +932,8 @@ enum vkd3d_shader_spirv_environment - VKD3D_SHADER_SPIRV_ENVIRONMENT_NONE, - VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5, - VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0, /* default target */ -+ /** \since 1.12 */ -+ VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_ENVIRONMENT), - }; -@@ -886,6 +948,8 @@ enum vkd3d_shader_spirv_extension - VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT, - /** \since 1.11 */ - VKD3D_SHADER_SPIRV_EXTENSION_EXT_VIEWPORT_INDEX_LAYER, -+ /** \since 1.12 */ -+ VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_EXTENSION), - }; -@@ -2377,9 +2441,8 @@ VKD3D_SHADER_API void vkd3d_shader_free_dxbc(struct vkd3d_shader_dxbc_desc *dxbc - * - * \param dxbc A vkd3d_shader_code structure containing the DXBC blob to parse. - * -- * \param flags A set of flags modifying the behaviour of the function. No -- * flags are defined for this version of vkd3d-shader, and this parameter -- * should be set to 0. -+ * \param flags A combination of zero or more elements of enum -+ * vkd3d_shader_parse_dxbc_flags. - * - * \param desc A vkd3d_shader_dxbc_desc structure describing the contents of - * the DXBC blob. Its vkd3d_shader_dxbc_section_desc structures will contain -diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h -index 12ceef42fc4..017eaf11806 100644 ---- a/libs/vkd3d/include/vkd3d_types.h -+++ b/libs/vkd3d/include/vkd3d_types.h -@@ -41,6 +41,8 @@ enum vkd3d_result - { - /** Success. */ - VKD3D_OK = 0, -+ /** Success as a result of there being nothing to do. */ -+ VKD3D_FALSE = 1, - /** An unspecified failure occurred. */ - VKD3D_ERROR = -1, - /** There are not enough resources available to complete the operation. */ -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index 06a12ef5bc4..6bc95dc55c4 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -17,11 +17,12 @@ - */ - - #define COBJMACROS -+ - #define CONST_VTABLE - #include "vkd3d.h" - #include "vkd3d_blob.h" --#include "vkd3d_debug.h" - #include "vkd3d_memory.h" -+#include "d3d12shader.h" - - struct vkd3d_blob - { -diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c -index e12cd39450a..4523fc997ef 100644 ---- a/libs/vkd3d/libs/vkd3d-common/debug.c -+++ b/libs/vkd3d/libs/vkd3d-common/debug.c -@@ -20,7 +20,7 @@ - # define _WIN32_WINNT 0x0600 /* For InitOnceExecuteOnce(). */ - #endif - --#include "vkd3d_debug.h" -+#include "vkd3d_common.h" - - #include - #include -diff --git a/libs/vkd3d/libs/vkd3d-common/error.c b/libs/vkd3d/libs/vkd3d-common/error.c -index 3572669ac1c..b8350a5404c 100644 ---- a/libs/vkd3d/libs/vkd3d-common/error.c -+++ b/libs/vkd3d/libs/vkd3d-common/error.c -@@ -17,7 +17,6 @@ - */ - - #include "vkd3d_common.h" --#include "vkd3d_debug.h" - - HRESULT hresult_from_vkd3d_result(int vkd3d_result) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 3f86bd45960..9abc2c4db70 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -250,6 +250,7 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_NOT ] = "not", - [VKD3DSIH_NRM ] = "nrm", - [VKD3DSIH_OR ] = "or", -+ [VKD3DSIH_ORD ] = "ord", - [VKD3DSIH_PHASE ] = "phase", - [VKD3DSIH_PHI ] = "phi", - [VKD3DSIH_POW ] = "pow", -@@ -321,44 +322,34 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_UMAX ] = "umax", - [VKD3DSIH_UMIN ] = "umin", - [VKD3DSIH_UMUL ] = "umul", -+ [VKD3DSIH_UNO ] = "uno", - [VKD3DSIH_USHR ] = "ushr", - [VKD3DSIH_UTOD ] = "utod", - [VKD3DSIH_UTOF ] = "utof", - [VKD3DSIH_UTOU ] = "utou", -+ [VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL ] = "wave_active_all_equal", -+ [VKD3DSIH_WAVE_ACTIVE_BALLOT ] = "wave_active_ballot", -+ [VKD3DSIH_WAVE_ACTIVE_BIT_AND ] = "wave_active_bit_and", -+ [VKD3DSIH_WAVE_ACTIVE_BIT_OR ] = "wave_active_bit_or", -+ [VKD3DSIH_WAVE_ACTIVE_BIT_XOR ] = "wave_active_bit_xor", -+ [VKD3DSIH_WAVE_ALL_BIT_COUNT ] = "wave_all_bit_count", -+ [VKD3DSIH_WAVE_ALL_TRUE ] = "wave_all_true", -+ [VKD3DSIH_WAVE_ANY_TRUE ] = "wave_any_true", -+ [VKD3DSIH_WAVE_IS_FIRST_LANE ] = "wave_is_first_lane", -+ [VKD3DSIH_WAVE_OP_ADD ] = "wave_op_add", -+ [VKD3DSIH_WAVE_OP_IMAX ] = "wave_op_imax", -+ [VKD3DSIH_WAVE_OP_IMIN ] = "wave_op_imin", -+ [VKD3DSIH_WAVE_OP_MAX ] = "wave_op_max", -+ [VKD3DSIH_WAVE_OP_MIN ] = "wave_op_min", -+ [VKD3DSIH_WAVE_OP_MUL ] = "wave_op_mul", -+ [VKD3DSIH_WAVE_OP_UMAX ] = "wave_op_umax", -+ [VKD3DSIH_WAVE_OP_UMIN ] = "wave_op_umin", -+ [VKD3DSIH_WAVE_PREFIX_BIT_COUNT ] = "wave_prefix_bit_count", -+ [VKD3DSIH_WAVE_READ_LANE_AT ] = "wave_read_lane_at", -+ [VKD3DSIH_WAVE_READ_LANE_FIRST ] = "wave_read_lane_first", - [VKD3DSIH_XOR ] = "xor", - }; - --static const struct --{ -- enum vkd3d_shader_input_sysval_semantic sysval_semantic; -- const char *sysval_name; --} --shader_input_sysval_semantic_names[] = --{ -- {VKD3D_SIV_POSITION, "position"}, -- {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, -- {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, -- {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, -- {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, -- {VKD3D_SIV_VERTEX_ID, "vertex_id"}, -- {VKD3D_SIV_INSTANCE_ID, "instance_id"}, -- {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, -- {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, -- {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, -- {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, -- {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, -- {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, -- {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, -- {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, -- {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, -- {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, -- {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, -- {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, -- {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, -- {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, -- {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, --}; -- - struct vkd3d_d3d_asm_colours - { - const char *reset; -@@ -370,6 +361,7 @@ struct vkd3d_d3d_asm_colours - const char *swizzle; - const char *version; - const char *write_mask; -+ const char *label; - }; - - struct vkd3d_d3d_asm_compiler -@@ -377,22 +369,10 @@ struct vkd3d_d3d_asm_compiler - struct vkd3d_string_buffer buffer; - struct vkd3d_shader_version shader_version; - struct vkd3d_d3d_asm_colours colours; -- enum vsir_asm_dialect dialect; -+ enum vsir_asm_flags flags; - const struct vkd3d_shader_instruction *current; - }; - --static int VKD3D_PRINTF_FUNC(2, 3) shader_addline(struct vkd3d_string_buffer *buffer, const char *format, ...) --{ -- va_list args; -- int ret; -- -- va_start(args, format); -- ret = vkd3d_string_buffer_vprintf(buffer, format, args); -- va_end(args); -- -- return ret; --} -- - /* Convert floating point offset relative to a register file to an absolute - * offset for float constants. */ - static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx) -@@ -445,6 +425,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, - vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); - } - -+static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags) -+{ -+ if (atomic_flags & VKD3DARF_SEQ_CST) -+ { -+ vkd3d_string_buffer_printf(&compiler->buffer, "_seqCst"); -+ atomic_flags &= ~VKD3DARF_SEQ_CST; -+ } -+ if (atomic_flags & VKD3DARF_VOLATILE) -+ { -+ vkd3d_string_buffer_printf(&compiler->buffer, "_volatile"); -+ atomic_flags &= ~VKD3DARF_VOLATILE; -+ } -+ -+ if (atomic_flags) -+ vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", atomic_flags); -+} -+ - static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) - { - if (sync_flags & VKD3DSSF_GLOBAL_UAV) -@@ -511,96 +508,138 @@ static void shader_dump_uav_flags(struct vkd3d_d3d_asm_compiler *compiler, uint3 - vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", uav_flags); - } - --static void shader_dump_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_tessellator_domain domain) -+static void shader_print_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_tessellator_domain d, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *domain; - -- shader_addline(buffer, "domain_"); -- switch (domain) -+ switch (d) - { - case VKD3D_TESSELLATOR_DOMAIN_LINE: -- shader_addline(buffer, "isoline"); -+ domain = "domain_isoline"; - break; - case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -- shader_addline(buffer, "tri"); -+ domain = "domain_tri"; - break; - case VKD3D_TESSELLATOR_DOMAIN_QUAD: -- shader_addline(buffer, "quad"); -+ domain = "domain_quad"; - break; - default: -- shader_addline(buffer, "unknown_tessellator_domain(%#x)", domain); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, d, compiler->colours.reset, suffix); -+ return; - } -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, domain, suffix); - } - --static void shader_dump_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_tessellator_output_primitive output_primitive) -+static void shader_print_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_shader_tessellator_output_primitive p, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *primitive; - -- shader_addline(buffer, "output_"); -- switch (output_primitive) -+ switch (p) - { - case VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT: -- shader_addline(buffer, "point"); -+ primitive = "output_point"; - break; - case VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE: -- shader_addline(buffer, "line"); -+ primitive = "output_line"; - break; - case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW: -- shader_addline(buffer, "triangle_cw"); -+ primitive = "output_triangle_cw"; - break; - case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW: -- shader_addline(buffer, "triangle_ccw"); -+ primitive = "output_triangle_ccw"; - break; - default: -- shader_addline(buffer, "unknown_tessellator_output_primitive(%#x)", output_primitive); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, p, compiler->colours.reset, suffix); -+ return; - } -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive, suffix); - } - --static void shader_dump_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_tessellator_partitioning partitioning) -+static void shader_print_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_shader_tessellator_partitioning p, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *partitioning; - -- shader_addline(buffer, "partitioning_"); -- switch (partitioning) -+ switch (p) - { - case VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER: -- shader_addline(buffer, "integer"); -+ partitioning = "partitioning_integer"; - break; - case VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2: -- shader_addline(buffer, "pow2"); -+ partitioning = "partitioning_pow2"; - break; - case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: -- shader_addline(buffer, "fractional_odd"); -+ partitioning = "partitioning_fractional_odd"; - break; - case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: -- shader_addline(buffer, "fractional_even"); -+ partitioning = "partitioning_fractional_even"; - break; - default: -- shader_addline(buffer, "unknown_tessellator_partitioning(%#x)", partitioning); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, p, compiler->colours.reset, suffix); -+ return; - } -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, partitioning, suffix); - } - --static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_input_sysval_semantic semantic) -+static void shader_print_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_shader_input_sysval_semantic semantic, const char *suffix) - { - unsigned int i; - -+ static const struct -+ { -+ enum vkd3d_shader_input_sysval_semantic sysval_semantic; -+ const char *sysval_name; -+ } -+ shader_input_sysval_semantic_names[] = -+ { -+ {VKD3D_SIV_POSITION, "position"}, -+ {VKD3D_SIV_CLIP_DISTANCE, "clip_distance"}, -+ {VKD3D_SIV_CULL_DISTANCE, "cull_distance"}, -+ {VKD3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"}, -+ {VKD3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"}, -+ {VKD3D_SIV_VERTEX_ID, "vertex_id"}, -+ {VKD3D_SIV_INSTANCE_ID, "instance_id"}, -+ {VKD3D_SIV_PRIMITIVE_ID, "primitive_id"}, -+ {VKD3D_SIV_IS_FRONT_FACE, "is_front_face"}, -+ {VKD3D_SIV_SAMPLE_INDEX, "sample_index"}, -+ {VKD3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"}, -+ {VKD3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"}, -+ {VKD3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"}, -+ {VKD3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"}, -+ {VKD3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"}, -+ {VKD3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"}, -+ {VKD3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"}, -+ {VKD3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"}, -+ {VKD3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"}, -+ {VKD3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"}, -+ {VKD3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"}, -+ {VKD3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"}, -+ }; -+ - for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i) - { -- if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic) -- { -- vkd3d_string_buffer_printf(&compiler->buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name); -- return; -- } -+ if (shader_input_sysval_semantic_names[i].sysval_semantic != semantic) -+ continue; -+ -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s", -+ prefix, shader_input_sysval_semantic_names[i].sysval_name, suffix); -+ return; - } - -- vkd3d_string_buffer_printf(&compiler->buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic); -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, semantic, compiler->colours.reset, suffix); - } - - static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_resource_type type) -@@ -646,6 +685,8 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum - [VKD3D_DATA_UINT8 ] = "uint8", - [VKD3D_DATA_UINT64 ] = "uint64", - [VKD3D_DATA_BOOL ] = "bool", -+ [VKD3D_DATA_UINT16 ] = "uint16", -+ [VKD3D_DATA_HALF ] = "half", - }; - - const char *name; -@@ -673,128 +714,133 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil - vkd3d_string_buffer_printf(&compiler->buffer, ")"); - } - --static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_semantic *semantic, uint32_t flags) -+static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_semantic *semantic, uint32_t flags, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ unsigned int usage_idx; -+ const char *usage; -+ bool indexed; - - if (semantic->resource.reg.reg.type == VKD3DSPR_COMBINED_SAMPLER) - { - switch (semantic->resource_type) - { - case VKD3D_SHADER_RESOURCE_TEXTURE_2D: -- shader_addline(buffer, "_2d"); -+ usage = "2d"; - break; -- - case VKD3D_SHADER_RESOURCE_TEXTURE_3D: -- shader_addline(buffer, "_volume"); -+ usage = "volume"; - break; -- - case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: -- shader_addline(buffer, "_cube"); -+ usage = "cube"; - break; -- - default: -- shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, semantic->resource_type, compiler->colours.reset, suffix); -+ return; - } -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); -+ return; - } -- else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) -+ -+ if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) - { -+ vkd3d_string_buffer_printf(buffer, "%s", prefix); - if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) -- shader_addline(buffer, "_resource"); -+ vkd3d_string_buffer_printf(buffer, "resource_"); - -- shader_addline(buffer, "_"); - shader_dump_resource_type(compiler, semantic->resource_type); - if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS - || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) - { -- shader_addline(buffer, "(%u)", semantic->sample_count); -+ vkd3d_string_buffer_printf(buffer, "(%u)", semantic->sample_count); - } - if (semantic->resource.reg.reg.type == VKD3DSPR_UAV) - shader_dump_uav_flags(compiler, flags); -- shader_addline(buffer, " "); -+ vkd3d_string_buffer_printf(buffer, " "); - shader_dump_resource_data_type(compiler, semantic->resource_data_type); -+ vkd3d_string_buffer_printf(buffer, "%s", suffix); -+ return; - } -- else -- { -- /* Pixel shaders 3.0 don't have usage semantics. */ -- if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) -- && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) -- return; -- else -- shader_addline(buffer, "_"); -- -- switch (semantic->usage) -- { -- case VKD3D_DECL_USAGE_POSITION: -- shader_addline(buffer, "position%u", semantic->usage_idx); -- break; -- -- case VKD3D_DECL_USAGE_BLEND_INDICES: -- shader_addline(buffer, "blend"); -- break; - -- case VKD3D_DECL_USAGE_BLEND_WEIGHT: -- shader_addline(buffer, "weight"); -- break; -- -- case VKD3D_DECL_USAGE_NORMAL: -- shader_addline(buffer, "normal%u", semantic->usage_idx); -- break; -- -- case VKD3D_DECL_USAGE_PSIZE: -- shader_addline(buffer, "psize"); -- break; -- -- case VKD3D_DECL_USAGE_COLOR: -- if (!semantic->usage_idx) -- shader_addline(buffer, "color"); -- else -- shader_addline(buffer, "specular%u", (semantic->usage_idx - 1)); -- break; -- -- case VKD3D_DECL_USAGE_TEXCOORD: -- shader_addline(buffer, "texcoord%u", semantic->usage_idx); -- break; -- -- case VKD3D_DECL_USAGE_TANGENT: -- shader_addline(buffer, "tangent"); -- break; -- -- case VKD3D_DECL_USAGE_BINORMAL: -- shader_addline(buffer, "binormal"); -- break; -- -- case VKD3D_DECL_USAGE_TESS_FACTOR: -- shader_addline(buffer, "tessfactor"); -- break; -- -- case VKD3D_DECL_USAGE_POSITIONT: -- shader_addline(buffer, "positionT%u", semantic->usage_idx); -- break; -- -- case VKD3D_DECL_USAGE_FOG: -- shader_addline(buffer, "fog"); -- break; -- -- case VKD3D_DECL_USAGE_DEPTH: -- shader_addline(buffer, "depth"); -- break; -+ /* Pixel shaders 3.0 don't have usage semantics. */ -+ if (!vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0) -+ && compiler->shader_version.type == VKD3D_SHADER_TYPE_PIXEL) -+ return; - -- case VKD3D_DECL_USAGE_SAMPLE: -- shader_addline(buffer, "sample"); -+ indexed = false; -+ usage_idx = semantic->usage_idx; -+ switch (semantic->usage) -+ { -+ case VKD3D_DECL_USAGE_POSITION: -+ usage = "position"; -+ indexed = true; -+ break; -+ case VKD3D_DECL_USAGE_BLEND_INDICES: -+ usage = "blend"; -+ break; -+ case VKD3D_DECL_USAGE_BLEND_WEIGHT: -+ usage = "weight"; -+ break; -+ case VKD3D_DECL_USAGE_NORMAL: -+ usage = "normal"; -+ indexed = true; -+ break; -+ case VKD3D_DECL_USAGE_PSIZE: -+ usage = "psize"; -+ break; -+ case VKD3D_DECL_USAGE_COLOR: -+ if (semantic->usage_idx) -+ { -+ usage = "specular"; -+ indexed = true; -+ --usage_idx; - break; -- -- default: -- shader_addline(buffer, "", semantic->usage); -- FIXME("Unrecognised semantic usage %#x.\n", semantic->usage); -- } -+ } -+ usage = "color"; -+ break; -+ case VKD3D_DECL_USAGE_TEXCOORD: -+ usage = "texcoord"; -+ indexed = true; -+ break; -+ case VKD3D_DECL_USAGE_TANGENT: -+ usage = "tangent"; -+ break; -+ case VKD3D_DECL_USAGE_BINORMAL: -+ usage = "binormal"; -+ break; -+ case VKD3D_DECL_USAGE_TESS_FACTOR: -+ usage = "tessfactor"; -+ break; -+ case VKD3D_DECL_USAGE_POSITIONT: -+ usage = "positionT"; -+ indexed = true; -+ break; -+ case VKD3D_DECL_USAGE_FOG: -+ usage = "fog"; -+ break; -+ case VKD3D_DECL_USAGE_DEPTH: -+ usage = "depth"; -+ break; -+ case VKD3D_DECL_USAGE_SAMPLE: -+ usage = "sample"; -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, semantic->usage, usage_idx, compiler->colours.reset, suffix); -+ return; - } -+ -+ if (indexed) -+ vkd3d_string_buffer_printf(buffer, "%s%s%u%s", prefix, usage, usage_idx, suffix); -+ else -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, usage, suffix); - } - --static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_src_param *param); -+static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix); - - static void shader_print_float_literal(struct vkd3d_d3d_asm_compiler *compiler, - const char *prefix, float f, const char *suffix) -@@ -891,13 +937,9 @@ static void shader_print_untyped_literal(struct vkd3d_d3d_asm_compiler *compiler - static void shader_print_subscript(struct vkd3d_d3d_asm_compiler *compiler, - unsigned int offset, const struct vkd3d_shader_src_param *rel_addr) - { -- vkd3d_string_buffer_printf(&compiler->buffer, "["); - if (rel_addr) -- { -- shader_dump_src_param(compiler, rel_addr); -- vkd3d_string_buffer_printf(&compiler->buffer, " + "); -- } -- shader_print_uint_literal(compiler, "", offset, "]"); -+ shader_print_src_param(compiler, "[", rel_addr, " + "); -+ shader_print_uint_literal(compiler, rel_addr ? "" : "[", offset, "]"); - } - - static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler, -@@ -910,8 +952,8 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler - vkd3d_string_buffer_printf(&compiler->buffer, "*]"); - } - --static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg, -- bool is_declaration) -+static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, -+ const struct vkd3d_shader_register *reg, bool is_declaration, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; - unsigned int offset = reg->idx[0].offset; -@@ -920,22 +962,23 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; - static const char * const misctype_reg_names[] = {"vPos", "vFace"}; - -- shader_addline(buffer, "%s", compiler->colours.reg); -+ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, -+ reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); - switch (reg->type) - { - case VKD3DSPR_TEMP: -- shader_addline(buffer, "r"); -+ vkd3d_string_buffer_printf(buffer, "r"); - break; - - case VKD3DSPR_INPUT: -- shader_addline(buffer, "v"); -+ vkd3d_string_buffer_printf(buffer, "v"); - break; - - case VKD3DSPR_CONST: - case VKD3DSPR_CONST2: - case VKD3DSPR_CONST3: - case VKD3DSPR_CONST4: -- shader_addline(buffer, "c"); -+ vkd3d_string_buffer_printf(buffer, "c"); - offset = shader_get_float_offset(reg->type, offset); - break; - -@@ -945,205 +988,210 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - break; - - case VKD3DSPR_RASTOUT: -- shader_addline(buffer, "%s", rastout_reg_names[offset]); -+ vkd3d_string_buffer_printf(buffer, "%s", rastout_reg_names[offset]); - break; - - case VKD3DSPR_COLOROUT: -- shader_addline(buffer, "oC"); -+ vkd3d_string_buffer_printf(buffer, "oC"); - break; - - case VKD3DSPR_DEPTHOUT: -- shader_addline(buffer, "oDepth"); -+ vkd3d_string_buffer_printf(buffer, "oDepth"); - break; - - case VKD3DSPR_DEPTHOUTGE: -- shader_addline(buffer, "oDepthGE"); -+ vkd3d_string_buffer_printf(buffer, "oDepthGE"); - break; - - case VKD3DSPR_DEPTHOUTLE: -- shader_addline(buffer, "oDepthLE"); -+ vkd3d_string_buffer_printf(buffer, "oDepthLE"); - break; - - case VKD3DSPR_ATTROUT: -- shader_addline(buffer, "oD"); -+ vkd3d_string_buffer_printf(buffer, "oD"); - break; - - case VKD3DSPR_TEXCRDOUT: - /* Vertex shaders >= 3.0 use general purpose output registers - * (VKD3DSPR_OUTPUT), which can include an address token. */ - if (vkd3d_shader_ver_ge(&compiler->shader_version, 3, 0)) -- shader_addline(buffer, "o"); -+ vkd3d_string_buffer_printf(buffer, "o"); - else -- shader_addline(buffer, "oT"); -+ vkd3d_string_buffer_printf(buffer, "oT"); - break; - - case VKD3DSPR_CONSTINT: -- shader_addline(buffer, "i"); -+ vkd3d_string_buffer_printf(buffer, "i"); - break; - - case VKD3DSPR_CONSTBOOL: -- shader_addline(buffer, "b"); -+ vkd3d_string_buffer_printf(buffer, "b"); - break; - - case VKD3DSPR_LABEL: -- shader_addline(buffer, "l"); -+ vkd3d_string_buffer_printf(buffer, "l"); - break; - - case VKD3DSPR_LOOP: -- shader_addline(buffer, "aL"); -+ vkd3d_string_buffer_printf(buffer, "aL"); - break; - - case VKD3DSPR_COMBINED_SAMPLER: - case VKD3DSPR_SAMPLER: -- shader_addline(buffer, "s"); -+ vkd3d_string_buffer_printf(buffer, "s"); - is_descriptor = true; - break; - - case VKD3DSPR_MISCTYPE: - if (offset > 1) -- { -- FIXME("Unhandled misctype register %u.\n", offset); -- shader_addline(buffer, "", offset); -- } -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, offset, compiler->colours.reset); - else -- { -- shader_addline(buffer, "%s", misctype_reg_names[offset]); -- } -+ vkd3d_string_buffer_printf(buffer, "%s", misctype_reg_names[offset]); - break; - - case VKD3DSPR_PREDICATE: -- shader_addline(buffer, "p"); -+ vkd3d_string_buffer_printf(buffer, "p"); - break; - - case VKD3DSPR_IMMCONST: -- shader_addline(buffer, "l"); -+ vkd3d_string_buffer_printf(buffer, "l"); - break; - - case VKD3DSPR_IMMCONST64: -- shader_addline(buffer, "d"); -+ vkd3d_string_buffer_printf(buffer, "d"); - break; - - case VKD3DSPR_CONSTBUFFER: -- shader_addline(buffer, "cb"); -+ vkd3d_string_buffer_printf(buffer, "cb"); - is_descriptor = true; - break; - - case VKD3DSPR_IMMCONSTBUFFER: -- shader_addline(buffer, "icb"); -+ vkd3d_string_buffer_printf(buffer, "icb"); - break; - - case VKD3DSPR_PRIMID: -- shader_addline(buffer, "primID"); -+ vkd3d_string_buffer_printf(buffer, "primID"); - break; - - case VKD3DSPR_NULL: -- shader_addline(buffer, "null"); -+ vkd3d_string_buffer_printf(buffer, "null"); - break; - - case VKD3DSPR_RASTERIZER: -- shader_addline(buffer, "rasterizer"); -+ vkd3d_string_buffer_printf(buffer, "rasterizer"); - break; - - case VKD3DSPR_RESOURCE: -- shader_addline(buffer, "t"); -+ vkd3d_string_buffer_printf(buffer, "t"); - is_descriptor = true; - break; - - case VKD3DSPR_UAV: -- shader_addline(buffer, "u"); -+ vkd3d_string_buffer_printf(buffer, "u"); - is_descriptor = true; - break; - - case VKD3DSPR_OUTPOINTID: -- shader_addline(buffer, "vOutputControlPointID"); -+ vkd3d_string_buffer_printf(buffer, "vOutputControlPointID"); - break; - - case VKD3DSPR_FORKINSTID: -- shader_addline(buffer, "vForkInstanceId"); -+ vkd3d_string_buffer_printf(buffer, "vForkInstanceId"); - break; - - case VKD3DSPR_JOININSTID: -- shader_addline(buffer, "vJoinInstanceId"); -+ vkd3d_string_buffer_printf(buffer, "vJoinInstanceId"); - break; - - case VKD3DSPR_INCONTROLPOINT: -- shader_addline(buffer, "vicp"); -+ vkd3d_string_buffer_printf(buffer, "vicp"); - break; - - case VKD3DSPR_OUTCONTROLPOINT: -- shader_addline(buffer, "vocp"); -+ vkd3d_string_buffer_printf(buffer, "vocp"); - break; - - case VKD3DSPR_PATCHCONST: -- shader_addline(buffer, "vpc"); -+ vkd3d_string_buffer_printf(buffer, "vpc"); - break; - - case VKD3DSPR_TESSCOORD: -- shader_addline(buffer, "vDomainLocation"); -+ vkd3d_string_buffer_printf(buffer, "vDomainLocation"); - break; - - case VKD3DSPR_GROUPSHAREDMEM: -- shader_addline(buffer, "g"); -+ vkd3d_string_buffer_printf(buffer, "g"); - break; - - case VKD3DSPR_THREADID: -- shader_addline(buffer, "vThreadID"); -+ vkd3d_string_buffer_printf(buffer, "vThreadID"); - break; - - case VKD3DSPR_THREADGROUPID: -- shader_addline(buffer, "vThreadGroupID"); -+ vkd3d_string_buffer_printf(buffer, "vThreadGroupID"); - break; - - case VKD3DSPR_LOCALTHREADID: -- shader_addline(buffer, "vThreadIDInGroup"); -+ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroup"); - break; - - case VKD3DSPR_LOCALTHREADINDEX: -- shader_addline(buffer, "vThreadIDInGroupFlattened"); -+ vkd3d_string_buffer_printf(buffer, "vThreadIDInGroupFlattened"); - break; - - case VKD3DSPR_IDXTEMP: -- shader_addline(buffer, "x"); -+ vkd3d_string_buffer_printf(buffer, "x"); - break; - - case VKD3DSPR_STREAM: -- shader_addline(buffer, "m"); -+ vkd3d_string_buffer_printf(buffer, "m"); - break; - - case VKD3DSPR_FUNCTIONBODY: -- shader_addline(buffer, "fb"); -+ vkd3d_string_buffer_printf(buffer, "fb"); - break; - - case VKD3DSPR_FUNCTIONPOINTER: -- shader_addline(buffer, "fp"); -+ vkd3d_string_buffer_printf(buffer, "fp"); - break; - - case VKD3DSPR_COVERAGE: -- shader_addline(buffer, "vCoverage"); -+ vkd3d_string_buffer_printf(buffer, "vCoverage"); - break; - - case VKD3DSPR_SAMPLEMASK: -- shader_addline(buffer, "oMask"); -+ vkd3d_string_buffer_printf(buffer, "oMask"); - break; - - case VKD3DSPR_GSINSTID: -- shader_addline(buffer, "vGSInstanceID"); -+ vkd3d_string_buffer_printf(buffer, "vGSInstanceID"); - break; - - case VKD3DSPR_OUTSTENCILREF: -- shader_addline(buffer, "oStencilRef"); -+ vkd3d_string_buffer_printf(buffer, "oStencilRef"); - break; - - case VKD3DSPR_UNDEF: -- shader_addline(buffer, "undef"); -+ vkd3d_string_buffer_printf(buffer, "undef"); - break; - - case VKD3DSPR_SSA: -- shader_addline(buffer, "sr"); -+ vkd3d_string_buffer_printf(buffer, "sr"); -+ break; -+ -+ case VKD3DSPR_WAVELANECOUNT: -+ vkd3d_string_buffer_printf(buffer, "vWaveLaneCount"); -+ break; -+ -+ case VKD3DSPR_WAVELANEINDEX: -+ vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); - break; - - default: -- shader_addline(buffer, "", reg->type); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->type, compiler->colours.reset); - break; - } - -@@ -1162,7 +1210,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - break; - } - -- shader_addline(buffer, "%s(", compiler->colours.reset); -+ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); - switch (reg->dimension) - { - case VSIR_DIMENSION_SCALAR: -@@ -1183,7 +1231,8 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); - break; - default: -- shader_addline(buffer, "", reg->data_type); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->data_type, compiler->colours.reset); - break; - } - break; -@@ -1222,20 +1271,22 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[3], ""); - break; - default: -- shader_addline(buffer, "", reg->data_type); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->data_type, compiler->colours.reset); - break; - } - break; - - default: -- shader_addline(buffer, "", reg->dimension); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->dimension, compiler->colours.reset); - break; - } -- shader_addline(buffer, ")"); -+ vkd3d_string_buffer_printf(buffer, ")"); - } - else if (reg->type == VKD3DSPR_IMMCONST64) - { -- shader_addline(buffer, "%s(", compiler->colours.reset); -+ vkd3d_string_buffer_printf(buffer, "%s(", compiler->colours.reset); - /* A double2 vector is treated as a float4 vector in enum vsir_dimension. */ - if (reg->dimension == VSIR_DIMENSION_SCALAR || reg->dimension == VSIR_DIMENSION_VEC4) - { -@@ -1253,14 +1304,16 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - } - else - { -- shader_addline(buffer, "", reg->data_type); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->data_type, compiler->colours.reset); - } - } - else - { -- shader_addline(buffer, "", reg->dimension); -+ vkd3d_string_buffer_printf(buffer, "%s%s", -+ compiler->colours.error, reg->dimension, compiler->colours.reset); - } -- shader_addline(buffer, ")"); -+ vkd3d_string_buffer_printf(buffer, ")"); - } - else if (reg->type != VKD3DSPR_RASTOUT - && reg->type != VKD3DSPR_MISCTYPE -@@ -1304,7 +1357,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - } - else - { -- shader_addline(buffer, "%s", compiler->colours.reset); -+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); - } - - if (reg->type == VKD3DSPR_FUNCTIONPOINTER) -@@ -1312,8 +1365,9 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const - } - else - { -- shader_addline(buffer, "%s", compiler->colours.reset); -+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); - } -+ vkd3d_string_buffer_printf(buffer, "%s", suffix); - } - - static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) -@@ -1357,8 +1411,8 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co - compiler->colours.modifier, compiler->colours.reset); - } - --static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_register *reg) -+static void shader_print_reg_type(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_register *reg, const char *suffix) - { - static const char *dimensions[] = - { -@@ -1370,7 +1424,13 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, - struct vkd3d_string_buffer *buffer = &compiler->buffer; - const char *dimension; - -- if (compiler->dialect != VSIR_ASM_VSIR) -+ if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES)) -+ { -+ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, suffix); -+ return; -+ } -+ -+ if (reg->data_type == VKD3D_DATA_UNUSED) - return; - - if (reg->dimension < ARRAY_SIZE(dimensions)) -@@ -1378,83 +1438,114 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, - else - dimension = "??"; - -- shader_addline(buffer, " <%s", dimension); -+ vkd3d_string_buffer_printf(buffer, "%s <%s", prefix, dimension); - shader_dump_data_type(compiler, reg->data_type); -- shader_addline(buffer, ">"); -+ vkd3d_string_buffer_printf(buffer, ">%s", suffix); - } - --static void shader_dump_dst_param(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_dst_param *param, bool is_declaration) -+static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, uint32_t mask, const char *suffix) -+{ -+ unsigned int i = 0; -+ char buffer[5]; -+ -+ if (mask == 0) -+ { -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s%s", prefix, suffix); -+ return; -+ } -+ -+ if (mask & VKD3DSP_WRITEMASK_0) -+ buffer[i++] = 'x'; -+ if (mask & VKD3DSP_WRITEMASK_1) -+ buffer[i++] = 'y'; -+ if (mask & VKD3DSP_WRITEMASK_2) -+ buffer[i++] = 'z'; -+ if (mask & VKD3DSP_WRITEMASK_3) -+ buffer[i++] = 'w'; -+ buffer[i++] = '\0'; -+ -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s.%s%s%s%s", prefix, -+ compiler->colours.write_mask, buffer, compiler->colours.reset, suffix); -+} -+ -+static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_dst_param *param, bool is_declaration, const char *suffix) - { -- struct vkd3d_string_buffer *buffer = &compiler->buffer; - uint32_t write_mask = param->write_mask; - -- shader_dump_register(compiler, ¶m->reg, is_declaration); -+ shader_print_register(compiler, prefix, ¶m->reg, is_declaration, ""); - - if (write_mask && param->reg.dimension == VSIR_DIMENSION_VEC4) - { -- static const char write_mask_chars[] = "xyzw"; -- - if (data_type_is_64_bit(param->reg.data_type)) - write_mask = vsir_write_mask_32_from_64(write_mask); - -- shader_addline(buffer, ".%s", compiler->colours.write_mask); -- if (write_mask & VKD3DSP_WRITEMASK_0) -- shader_addline(buffer, "%c", write_mask_chars[0]); -- if (write_mask & VKD3DSP_WRITEMASK_1) -- shader_addline(buffer, "%c", write_mask_chars[1]); -- if (write_mask & VKD3DSP_WRITEMASK_2) -- shader_addline(buffer, "%c", write_mask_chars[2]); -- if (write_mask & VKD3DSP_WRITEMASK_3) -- shader_addline(buffer, "%c", write_mask_chars[3]); -- shader_addline(buffer, "%s", compiler->colours.reset); -+ shader_print_write_mask(compiler, "", write_mask, ""); - } - - shader_print_precision(compiler, ¶m->reg); - shader_print_non_uniform(compiler, ¶m->reg); -- shader_dump_reg_type(compiler, ¶m->reg); -+ shader_print_reg_type(compiler, "", ¶m->reg, suffix); - } - --static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_src_param *param) -+static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix) - { - enum vkd3d_shader_src_modifier src_modifier = param->modifiers; - struct vkd3d_string_buffer *buffer = &compiler->buffer; - uint32_t swizzle = param->swizzle; -+ const char *modifier = ""; -+ bool is_abs = false; - - if (src_modifier == VKD3DSPSM_NEG - || src_modifier == VKD3DSPSM_BIASNEG - || src_modifier == VKD3DSPSM_SIGNNEG - || src_modifier == VKD3DSPSM_X2NEG - || src_modifier == VKD3DSPSM_ABSNEG) -- shader_addline(buffer, "-"); -+ modifier = "-"; - else if (src_modifier == VKD3DSPSM_COMP) -- shader_addline(buffer, "1-"); -+ modifier = "1-"; - else if (src_modifier == VKD3DSPSM_NOT) -- shader_addline(buffer, "!"); -+ modifier = "!"; -+ vkd3d_string_buffer_printf(buffer, "%s%s", prefix, modifier); - - if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) -- shader_addline(buffer, "|"); -+ is_abs = true; - -- shader_dump_register(compiler, ¶m->reg, false); -+ shader_print_register(compiler, is_abs ? "|" : "", ¶m->reg, false, ""); - - switch (src_modifier) - { -- case VKD3DSPSM_NONE: break; -- case VKD3DSPSM_NEG: break; -- case VKD3DSPSM_NOT: break; -- case VKD3DSPSM_BIAS: shader_addline(buffer, "_bias"); break; -- case VKD3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break; -- case VKD3DSPSM_SIGN: shader_addline(buffer, "_bx2"); break; -- case VKD3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break; -- case VKD3DSPSM_COMP: break; -- case VKD3DSPSM_X2: shader_addline(buffer, "_x2"); break; -- case VKD3DSPSM_X2NEG: shader_addline(buffer, "_x2"); break; -- case VKD3DSPSM_DZ: shader_addline(buffer, "_dz"); break; -- case VKD3DSPSM_DW: shader_addline(buffer, "_dw"); break; -+ case VKD3DSPSM_NONE: -+ case VKD3DSPSM_NEG: -+ case VKD3DSPSM_COMP: -+ case VKD3DSPSM_ABS: - case VKD3DSPSM_ABSNEG: -- case VKD3DSPSM_ABS: /* handled later */ break; -- default: shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier); -+ case VKD3DSPSM_NOT: -+ break; -+ case VKD3DSPSM_BIAS: -+ case VKD3DSPSM_BIASNEG: -+ vkd3d_string_buffer_printf(buffer, "_bias"); -+ break; -+ case VKD3DSPSM_SIGN: -+ case VKD3DSPSM_SIGNNEG: -+ vkd3d_string_buffer_printf(buffer, "_bx2"); -+ break; -+ case VKD3DSPSM_X2: -+ case VKD3DSPSM_X2NEG: -+ vkd3d_string_buffer_printf(buffer, "_x2"); -+ break; -+ case VKD3DSPSM_DZ: -+ vkd3d_string_buffer_printf(buffer, "_dz"); -+ break; -+ case VKD3DSPSM_DW: -+ vkd3d_string_buffer_printf(buffer, "_dw"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_%s%s", -+ compiler->colours.error, src_modifier, compiler->colours.reset); -+ break; - } - - if (param->reg.type != VKD3DSPR_IMMCONST && param->reg.type != VKD3DSPR_IMMCONST64 -@@ -1472,26 +1563,21 @@ static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, - swizzle_z = vsir_swizzle_get_component(swizzle, 2); - swizzle_w = vsir_swizzle_get_component(swizzle, 3); - -- if (swizzle_x == swizzle_y -- && swizzle_x == swizzle_z -- && swizzle_x == swizzle_w) -- { -- shader_addline(buffer, ".%s%c%s", compiler->colours.swizzle, -+ if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) -+ vkd3d_string_buffer_printf(buffer, ".%s%c%s", compiler->colours.swizzle, - swizzle_chars[swizzle_x], compiler->colours.reset); -- } - else -- { -- shader_addline(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, -+ vkd3d_string_buffer_printf(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, - swizzle_chars[swizzle_x], swizzle_chars[swizzle_y], - swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset); -- } - } -- if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) -- shader_addline(buffer, "|"); -+ -+ if (is_abs) -+ vkd3d_string_buffer_printf(buffer, "|"); - - shader_print_precision(compiler, ¶m->reg); - shader_print_non_uniform(compiler, ¶m->reg); -- shader_dump_reg_type(compiler, ¶m->reg); -+ shader_print_reg_type(compiler, "", ¶m->reg, suffix); - } - - static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, -@@ -1502,105 +1588,129 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, - - switch (dst->shift) - { -- case 0: break; -- case 13: shader_addline(buffer, "_d8"); break; -- case 14: shader_addline(buffer, "_d4"); break; -- case 15: shader_addline(buffer, "_d2"); break; -- case 1: shader_addline(buffer, "_x2"); break; -- case 2: shader_addline(buffer, "_x4"); break; -- case 3: shader_addline(buffer, "_x8"); break; -- default: shader_addline(buffer, "_unhandled_shift(%d)", dst->shift); break; -+ case 0: -+ break; -+ case 13: -+ vkd3d_string_buffer_printf(buffer, "_d8"); -+ break; -+ case 14: -+ vkd3d_string_buffer_printf(buffer, "_d4"); -+ break; -+ case 15: -+ vkd3d_string_buffer_printf(buffer, "_d2"); -+ break; -+ case 1: -+ vkd3d_string_buffer_printf(buffer, "_x2"); -+ break; -+ case 2: -+ vkd3d_string_buffer_printf(buffer, "_x4"); -+ break; -+ case 3: -+ vkd3d_string_buffer_printf(buffer, "_x8"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_unhandled_shift(%d)", dst->shift); -+ break; - } - -- if (mmask & VKD3DSPDM_SATURATE) shader_addline(buffer, "_sat"); -- if (mmask & VKD3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp"); -- if (mmask & VKD3DSPDM_MSAMPCENTROID) shader_addline(buffer, "_centroid"); -+ if (mmask & VKD3DSPDM_SATURATE) -+ vkd3d_string_buffer_printf(buffer, "_sat"); -+ if (mmask & VKD3DSPDM_PARTIALPRECISION) -+ vkd3d_string_buffer_printf(buffer, "_pp"); -+ if (mmask & VKD3DSPDM_MSAMPCENTROID) -+ vkd3d_string_buffer_printf(buffer, "_centroid"); - - mmask &= ~VKD3DSPDM_MASK; - if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask); - } - --static void shader_dump_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, -- const struct vkd3d_shader_primitive_type *primitive_type) -+static void shader_print_primitive_type(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, const struct vkd3d_shader_primitive_type *p, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *primitive_type; - -- switch (primitive_type->type) -+ switch (p->type) - { - case VKD3D_PT_UNDEFINED: -- shader_addline(buffer, "undefined"); -+ primitive_type = "undefined"; - break; - case VKD3D_PT_POINTLIST: -- shader_addline(buffer, "pointlist"); -+ primitive_type = "pointlist"; - break; - case VKD3D_PT_LINELIST: -- shader_addline(buffer, "linelist"); -+ primitive_type = "linelist"; - break; - case VKD3D_PT_LINESTRIP: -- shader_addline(buffer, "linestrip"); -+ primitive_type = "linestrip"; - break; - case VKD3D_PT_TRIANGLELIST: -- shader_addline(buffer, "trianglelist"); -+ primitive_type = "trianglelist"; - break; - case VKD3D_PT_TRIANGLESTRIP: -- shader_addline(buffer, "trianglestrip"); -+ primitive_type = "trianglestrip"; - break; - case VKD3D_PT_TRIANGLEFAN: -- shader_addline(buffer, "trianglefan"); -+ primitive_type = "trianglefan"; - break; - case VKD3D_PT_LINELIST_ADJ: -- shader_addline(buffer, "linelist_adj"); -+ primitive_type = "linelist_adj"; - break; - case VKD3D_PT_LINESTRIP_ADJ: -- shader_addline(buffer, "linestrip_adj"); -+ primitive_type = "linestrip_adj"; - break; - case VKD3D_PT_TRIANGLELIST_ADJ: -- shader_addline(buffer, "trianglelist_adj"); -+ primitive_type = "trianglelist_adj"; - break; - case VKD3D_PT_TRIANGLESTRIP_ADJ: -- shader_addline(buffer, "trianglestrip_adj"); -+ primitive_type = "trianglestrip_adj"; - break; - case VKD3D_PT_PATCH: -- shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count); -- break; -+ vkd3d_string_buffer_printf(buffer, "%spatch%u%s", prefix, p->patch_vertex_count, suffix); -+ return; - default: -- shader_addline(buffer, "", primitive_type->type); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, p->type, compiler->colours.reset, suffix); -+ return; - } -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive_type, suffix); - } - --static void shader_dump_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_interpolation_mode interpolation_mode) -+static void shader_print_interpolation_mode(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *prefix, enum vkd3d_shader_interpolation_mode m, const char *suffix) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ const char *mode; - -- switch (interpolation_mode) -+ switch (m) - { - case VKD3DSIM_CONSTANT: -- shader_addline(buffer, "constant"); -+ mode = "constant"; - break; - case VKD3DSIM_LINEAR: -- shader_addline(buffer, "linear"); -+ mode = "linear"; - break; - case VKD3DSIM_LINEAR_CENTROID: -- shader_addline(buffer, "linear centroid"); -+ mode = "linear centroid"; - break; - case VKD3DSIM_LINEAR_NOPERSPECTIVE: -- shader_addline(buffer, "linear noperspective"); -+ mode = "linear noperspective"; - break; - case VKD3DSIM_LINEAR_SAMPLE: -- shader_addline(buffer, "linear sample"); -+ mode = "linear sample"; - break; - case VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID: -- shader_addline(buffer, "linear noperspective centroid"); -+ mode = "linear noperspective centroid"; - break; - case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: -- shader_addline(buffer, "linear noperspective sample"); -+ mode = "linear noperspective sample"; - break; - default: -- shader_addline(buffer, "", interpolation_mode); -- break; -+ vkd3d_string_buffer_printf(buffer, "%s%s%s%s", -+ prefix, compiler->colours.error, m, compiler->colours.reset, suffix); -+ return; - } -+ vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, mode, suffix); - } - - const char *shader_get_type_prefix(enum vkd3d_shader_type type) -@@ -1654,9 +1764,15 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - case VKD3DSIH_RETP: - switch (ins->flags) - { -- case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; -- case VKD3D_SHADER_CONDITIONAL_OP_Z: shader_addline(buffer, "_z"); break; -- default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); break; -+ case VKD3D_SHADER_CONDITIONAL_OP_NZ: -+ vkd3d_string_buffer_printf(buffer, "_nz"); -+ break; -+ case VKD3D_SHADER_CONDITIONAL_OP_Z: -+ vkd3d_string_buffer_printf(buffer, "_z"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); -+ break; - } - break; - -@@ -1664,49 +1780,99 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - case VKD3DSIH_BREAKC: - switch (ins->flags) - { -- case VKD3D_SHADER_REL_OP_GT: shader_addline(buffer, "_gt"); break; -- case VKD3D_SHADER_REL_OP_EQ: shader_addline(buffer, "_eq"); break; -- case VKD3D_SHADER_REL_OP_GE: shader_addline(buffer, "_ge"); break; -- case VKD3D_SHADER_REL_OP_LT: shader_addline(buffer, "_lt"); break; -- case VKD3D_SHADER_REL_OP_NE: shader_addline(buffer, "_ne"); break; -- case VKD3D_SHADER_REL_OP_LE: shader_addline(buffer, "_le"); break; -- default: shader_addline(buffer, "_(%u)", ins->flags); -+ case VKD3D_SHADER_REL_OP_GT: -+ vkd3d_string_buffer_printf(buffer, "_gt"); -+ break; -+ case VKD3D_SHADER_REL_OP_EQ: -+ vkd3d_string_buffer_printf(buffer, "_eq"); -+ break; -+ case VKD3D_SHADER_REL_OP_GE: -+ vkd3d_string_buffer_printf(buffer, "_ge"); -+ break; -+ case VKD3D_SHADER_REL_OP_LT: -+ vkd3d_string_buffer_printf(buffer, "_lt"); -+ break; -+ case VKD3D_SHADER_REL_OP_NE: -+ vkd3d_string_buffer_printf(buffer, "_ne"); -+ break; -+ case VKD3D_SHADER_REL_OP_LE: -+ vkd3d_string_buffer_printf(buffer, "_le"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_(%u)", ins->flags); -+ break; - } - break; - - case VKD3DSIH_RESINFO: - switch (ins->flags) - { -- case VKD3DSI_NONE: break; -- case VKD3DSI_RESINFO_RCP_FLOAT: shader_addline(buffer, "_rcpFloat"); break; -- case VKD3DSI_RESINFO_UINT: shader_addline(buffer, "_uint"); break; -- default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); -+ case VKD3DSI_NONE: -+ break; -+ case VKD3DSI_RESINFO_RCP_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "_rcpFloat"); -+ break; -+ case VKD3DSI_RESINFO_UINT: -+ vkd3d_string_buffer_printf(buffer, "_uint"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); -+ break; - } - break; - - case VKD3DSIH_SAMPLE_INFO: - switch (ins->flags) - { -- case VKD3DSI_NONE: break; -- case VKD3DSI_SAMPLE_INFO_UINT: shader_addline(buffer, "_uint"); break; -- default: shader_addline(buffer, "_unrecognized(%#x)", ins->flags); -+ case VKD3DSI_NONE: -+ break; -+ case VKD3DSI_SAMPLE_INFO_UINT: -+ vkd3d_string_buffer_printf(buffer, "_uint"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "_unrecognized(%#x)", ins->flags); -+ break; - } - break; - -+ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: -+ case VKD3DSIH_IMM_ATOMIC_IADD: -+ case VKD3DSIH_IMM_ATOMIC_AND: -+ case VKD3DSIH_IMM_ATOMIC_IMAX: -+ case VKD3DSIH_IMM_ATOMIC_IMIN: -+ case VKD3DSIH_IMM_ATOMIC_OR: -+ case VKD3DSIH_IMM_ATOMIC_UMAX: -+ case VKD3DSIH_IMM_ATOMIC_UMIN: -+ case VKD3DSIH_IMM_ATOMIC_EXCH: -+ case VKD3DSIH_IMM_ATOMIC_XOR: -+ shader_dump_atomic_op_flags(compiler, ins->flags); -+ break; -+ - case VKD3DSIH_SYNC: - shader_dump_sync_flags(compiler, ins->flags); - break; - - case VKD3DSIH_TEX: - if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) -- shader_addline(buffer, "p"); -+ vkd3d_string_buffer_printf(buffer, "p"); -+ break; -+ -+ case VKD3DSIH_WAVE_OP_ADD: -+ case VKD3DSIH_WAVE_OP_IMAX: -+ case VKD3DSIH_WAVE_OP_IMIN: -+ case VKD3DSIH_WAVE_OP_MAX: -+ case VKD3DSIH_WAVE_OP_MIN: -+ case VKD3DSIH_WAVE_OP_MUL: -+ case VKD3DSIH_WAVE_OP_UMAX: -+ case VKD3DSIH_WAVE_OP_UMIN: -+ vkd3d_string_buffer_printf(&compiler->buffer, (ins->flags & VKD3DSI_WAVE_PREFIX) ? "_prefix" : "_active"); - break; - - case VKD3DSIH_ISHL: - case VKD3DSIH_ISHR: - case VKD3DSIH_USHR: - if (ins->flags & VKD3DSI_SHIFT_UNMASKED) -- shader_addline(buffer, "_unmasked"); -+ vkd3d_string_buffer_printf(buffer, "_unmasked"); - /* fall through */ - default: - shader_dump_precise_flags(compiler, ins->flags); -@@ -1753,7 +1919,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, - shader_print_hex_literal(compiler, ", ", icb->data[4 * i + 3], "},\n"); - } - } -- shader_addline(buffer, "}"); -+ vkd3d_string_buffer_printf(buffer, "}"); - } - - static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, -@@ -1765,11 +1931,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - compiler->current = ins; - - if (ins->predicate) -- { -- vkd3d_string_buffer_printf(buffer, "("); -- shader_dump_src_param(compiler, ins->predicate); -- vkd3d_string_buffer_printf(buffer, ") "); -- } -+ shader_print_src_param(compiler, "(", ins->predicate, ") "); - - /* PixWin marks instructions with the coissue flag with a '+' */ - if (ins->coissue) -@@ -1782,21 +1944,20 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_UAV_TYPED: - vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.opcode); -- shader_dump_decl_usage(compiler, &ins->declaration.semantic, ins->flags); -+ shader_print_dcl_usage(compiler, "_", &ins->declaration.semantic, ins->flags, ""); - shader_dump_ins_modifiers(compiler, &ins->declaration.semantic.resource.reg); -- vkd3d_string_buffer_printf(buffer, "%s ", compiler->colours.reset); -- shader_dump_register(compiler, &ins->declaration.semantic.resource.reg.reg, true); -+ vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.reset); -+ shader_print_register(compiler, " ", &ins->declaration.semantic.resource.reg.reg, true, ""); - shader_dump_register_space(compiler, ins->declaration.semantic.resource.range.space); - break; - - case VKD3DSIH_DCL_CONSTANT_BUFFER: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_register(compiler, &ins->declaration.cb.src.reg, true); -+ shader_print_register(compiler, " ", &ins->declaration.cb.src.reg, true, ""); - if (vkd3d_shader_ver_ge(&compiler->shader_version, 6, 0)) - shader_print_subscript(compiler, ins->declaration.cb.size, NULL); - else if (vkd3d_shader_ver_ge(&compiler->shader_version, 5, 1)) - shader_print_subscript(compiler, ins->declaration.cb.size / VKD3D_VEC4_SIZE / sizeof(float), NULL); -- shader_addline(buffer, ", %s", -+ vkd3d_string_buffer_printf(buffer, ", %s", - ins->flags & VKD3DSI_INDEXED_DYNAMIC ? "dynamicIndexed" : "immediateIndexed"); - shader_dump_register_space(compiler, ins->declaration.cb.range.space); - break; -@@ -1823,8 +1984,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3DSIH_DCL_INDEX_RANGE: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.index_range.dst, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.index_range.dst, true, ""); - shader_print_uint_literal(compiler, " ", ins->declaration.index_range.register_count, ""); - break; - -@@ -1840,41 +2000,32 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3DSIH_DCL_INPUT_PS: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_interpolation_mode(compiler, ins->flags); -- shader_addline(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.dst, true); -+ shader_print_interpolation_mode(compiler, " ", ins->flags, ""); -+ shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); - break; - - case VKD3DSIH_DCL_INPUT_PS_SGV: - case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_INPUT_SIV: - case VKD3DSIH_DCL_OUTPUT_SIV: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); -- shader_addline(buffer, ", "); -- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); -+ shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); -+ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); - break; - - case VKD3DSIH_DCL_INPUT_PS_SIV: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_interpolation_mode(compiler, ins->flags); -- shader_addline(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); -- shader_addline(buffer, ", "); -- shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); -+ shader_print_interpolation_mode(compiler, " ", ins->flags, ""); -+ shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); -+ shader_print_input_sysval_semantic(compiler, ", ", ins->declaration.register_semantic.sysval_semantic, ""); - break; - - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.dst, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); - break; - - case VKD3DSIH_DCL_INPUT_PRIMITIVE: - case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_primitive_type(compiler, &ins->declaration.primitive_type); -+ shader_print_primitive_type(compiler, " ", &ins->declaration.primitive_type, ""); - break; - - case VKD3DSIH_DCL_INTERFACE: -@@ -1885,23 +2036,19 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3DSIH_DCL_RESOURCE_RAW: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); - shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); - break; - - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); - shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); - break; - - case VKD3DSIH_DCL_SAMPLER: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_register(compiler, &ins->declaration.sampler.src.reg, true); -- if (ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE) -- shader_addline(buffer, ", comparisonMode"); -+ shader_print_register(compiler, " ", &ins->declaration.sampler.src.reg, true, -+ ins->flags == VKD3DSI_SAMPLER_COMPARISON_MODE ? ", comparisonMode" : ""); - shader_dump_register_space(compiler, ins->declaration.sampler.range.space); - break; - -@@ -1916,29 +2063,24 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_tessellator_domain(compiler, ins->declaration.tessellator_domain); -+ shader_print_tessellator_domain(compiler, " ", ins->declaration.tessellator_domain, ""); - break; - - case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_tessellator_output_primitive(compiler, ins->declaration.tessellator_output_primitive); -+ shader_print_tessellator_output_primitive(compiler, " ", ins->declaration.tessellator_output_primitive, ""); - break; - - case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_tessellator_partitioning(compiler, ins->declaration.tessellator_partitioning); -+ shader_print_tessellator_partitioning(compiler, " ", ins->declaration.tessellator_partitioning, ""); - break; - - case VKD3DSIH_DCL_TGSM_RAW: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.tgsm_raw.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_raw.reg, true, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_raw.byte_count, ""); - break; - - case VKD3DSIH_DCL_TGSM_STRUCTURED: -- vkd3d_string_buffer_printf(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.tgsm_structured.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_structured.reg, true, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.byte_stride, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.structure_count, ""); - break; -@@ -1951,15 +2093,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - - case VKD3DSIH_DCL_UAV_RAW: - shader_dump_uav_flags(compiler, ins->flags); -- shader_addline(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); - shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); - break; - - case VKD3DSIH_DCL_UAV_STRUCTURED: - shader_dump_uav_flags(compiler, ins->flags); -- shader_addline(buffer, " "); -- shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); -+ shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); - shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); - shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); - break; -@@ -1994,7 +2134,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - - if (ins->resource_type != VKD3D_SHADER_RESOURCE_NONE) - { -- shader_addline(buffer, "_indexable("); -+ vkd3d_string_buffer_printf(buffer, "_indexable("); - if (ins->raw) - vkd3d_string_buffer_printf(buffer, "raw_"); - if (ins->structured) -@@ -2002,7 +2142,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - shader_dump_resource_type(compiler, ins->resource_type); - if (ins->resource_stride) - shader_print_uint_literal(compiler, ", stride=", ins->resource_stride, ""); -- shader_addline(buffer, ")"); -+ vkd3d_string_buffer_printf(buffer, ")"); - } - - if (vkd3d_shader_instruction_has_texel_offset(ins)) -@@ -2021,37 +2161,200 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - for (i = 0; i < ins->dst_count; ++i) - { - shader_dump_ins_modifiers(compiler, &ins->dst[i]); -- shader_addline(buffer, !i ? " " : ", "); -- shader_dump_dst_param(compiler, &ins->dst[i], false); -+ shader_print_dst_param(compiler, !i ? " " : ", ", &ins->dst[i], false, ""); - } - - /* Other source tokens */ - for (i = ins->dst_count; i < (ins->dst_count + ins->src_count); ++i) - { -- shader_addline(buffer, !i ? " " : ", "); -- shader_dump_src_param(compiler, &ins->src[i - ins->dst_count]); -+ shader_print_src_param(compiler, !i ? " " : ", ", &ins->src[i - ins->dst_count], ""); - } - break; - } - -- shader_addline(buffer, "\n"); -+ vkd3d_string_buffer_printf(buffer, "\n"); -+} -+ -+static const char *get_sysval_semantic_name(enum vkd3d_shader_sysval_semantic semantic) -+{ -+ switch (semantic) -+ { -+ case VKD3D_SHADER_SV_NONE: return "NONE"; -+ case VKD3D_SHADER_SV_POSITION: return "POS"; -+ case VKD3D_SHADER_SV_CLIP_DISTANCE: return "CLIPDST"; -+ case VKD3D_SHADER_SV_CULL_DISTANCE: return "CULLDST"; -+ case VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX: return "RTINDEX"; -+ case VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX: return "VPINDEX"; -+ case VKD3D_SHADER_SV_VERTEX_ID: return "VERTID"; -+ case VKD3D_SHADER_SV_PRIMITIVE_ID: return "PRIMID"; -+ case VKD3D_SHADER_SV_INSTANCE_ID: return "INSTID"; -+ case VKD3D_SHADER_SV_IS_FRONT_FACE: return "FFACE"; -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: return "SAMPLE"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: return "QUADEDGE"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: return "QUADINT"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: return "TRIEDGE"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: return "TRIINT"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: return "LINEDET"; -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: return "LINEDEN"; -+ case VKD3D_SHADER_SV_TARGET: return "TARGET"; -+ case VKD3D_SHADER_SV_DEPTH: return "DEPTH"; -+ case VKD3D_SHADER_SV_COVERAGE: return "COVERAGE"; -+ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "DEPTHGE"; -+ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "DEPTHLE"; -+ case VKD3D_SHADER_SV_STENCIL_REF: return "STENCILREF"; -+ default: return "??"; -+ } -+} -+ -+static const char *get_component_type_name(enum vkd3d_shader_component_type type) -+{ -+ switch (type) -+ { -+ case VKD3D_SHADER_COMPONENT_VOID: return "void"; -+ case VKD3D_SHADER_COMPONENT_UINT: return "uint"; -+ case VKD3D_SHADER_COMPONENT_INT: return "int"; -+ case VKD3D_SHADER_COMPONENT_FLOAT: return "float"; -+ case VKD3D_SHADER_COMPONENT_BOOL: return "bool"; -+ case VKD3D_SHADER_COMPONENT_DOUBLE: return "double"; -+ case VKD3D_SHADER_COMPONENT_UINT64: return "uint64"; -+ default: return "??"; -+ } -+} -+ -+static const char *get_minimum_precision_name(enum vkd3d_shader_minimum_precision prec) -+{ -+ switch (prec) -+ { -+ case VKD3D_SHADER_MINIMUM_PRECISION_NONE: return "NONE"; -+ case VKD3D_SHADER_MINIMUM_PRECISION_FLOAT_16: return "FLOAT_16"; -+ case VKD3D_SHADER_MINIMUM_PRECISION_FIXED_8_2: return "FIXED_8_2"; -+ case VKD3D_SHADER_MINIMUM_PRECISION_INT_16: return "INT_16"; -+ case VKD3D_SHADER_MINIMUM_PRECISION_UINT_16: return "UINT_16"; -+ default: return "??"; -+ } -+} -+ -+static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic semantic) -+{ -+ switch (semantic) -+ { -+ case VKD3D_SHADER_SV_DEPTH: return "oDepth"; -+ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; -+ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; -+ /* SV_Coverage has name vCoverage when used as an input, -+ * but it doens't appear in the signature in that case. */ -+ case VKD3D_SHADER_SV_COVERAGE: return "oMask"; -+ case VKD3D_SHADER_SV_STENCIL_REF: return "oStencilRef"; -+ default: return "??"; -+ } - } - --enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, -+static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, -+ const char *name, const char *register_name, const struct shader_signature *signature) -+{ -+ struct vkd3d_string_buffer *buffer = &compiler->buffer; -+ unsigned int i; -+ -+ if (signature->element_count == 0) -+ return VKD3D_OK; -+ -+ vkd3d_string_buffer_printf(buffer, "%s%s%s\n", -+ compiler->colours.opcode, name, compiler->colours.reset); -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ struct signature_element *element = &signature->elements[i]; -+ -+ vkd3d_string_buffer_printf(buffer, "%s.param%s %s", compiler->colours.opcode, -+ compiler->colours.reset, element->semantic_name); -+ -+ if (element->semantic_index != 0) -+ vkd3d_string_buffer_printf(buffer, "%u", element->semantic_index); -+ -+ if (element->register_index != -1) -+ { -+ shader_print_write_mask(compiler, "", element->mask, ""); -+ vkd3d_string_buffer_printf(buffer, ", %s%s%d%s", compiler->colours.reg, -+ register_name, element->register_index, compiler->colours.reset); -+ shader_print_write_mask(compiler, "", element->used_mask, ""); -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, ", %s%s%s", compiler->colours.reg, -+ get_semantic_register_name(element->sysval_semantic), compiler->colours.reset); -+ } -+ -+ if (!element->component_type && !element->sysval_semantic -+ && !element->min_precision && !element->stream_index) -+ goto done; -+ -+ vkd3d_string_buffer_printf(buffer, ", %s", -+ get_component_type_name(element->component_type)); -+ -+ if (!element->sysval_semantic && !element->min_precision && !element->stream_index) -+ goto done; -+ -+ vkd3d_string_buffer_printf(buffer, ", %s", -+ get_sysval_semantic_name(element->sysval_semantic)); -+ -+ if (!element->min_precision && !element->stream_index) -+ goto done; -+ -+ vkd3d_string_buffer_printf(buffer, ", %s", -+ get_minimum_precision_name(element->min_precision)); -+ -+ if (!element->stream_index) -+ goto done; -+ -+ vkd3d_string_buffer_printf(buffer, ", m%u", -+ element->stream_index); -+ -+ done: -+ vkd3d_string_buffer_printf(buffer, "\n"); -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, -+ const struct vsir_program *program) -+{ -+ enum vkd3d_result ret; -+ -+ if ((ret = dump_signature(compiler, ".input", -+ program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", -+ &program->input_signature)) < 0) -+ return ret; -+ -+ if ((ret = dump_signature(compiler, ".output", "o", -+ &program->output_signature)) < 0) -+ return ret; -+ -+ if ((ret = dump_signature(compiler, ".patch_constant", -+ program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", -+ &program->patch_constant_signature)) < 0) -+ return ret; -+ -+ vkd3d_string_buffer_printf(&compiler->buffer, "%s.text%s\n", -+ compiler->colours.opcode, compiler->colours.reset); -+ -+ return VKD3D_OK; -+} -+ -+enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect) -+ struct vkd3d_shader_code *out, enum vsir_asm_flags flags) - { - const struct vkd3d_shader_version *shader_version = &program->shader_version; - enum vkd3d_shader_compile_option_formatting_flags formatting; - struct vkd3d_d3d_asm_compiler compiler = - { -- .dialect = dialect, -+ .flags = flags, - }; - enum vkd3d_result result = VKD3D_OK; - struct vkd3d_string_buffer *buffer; - unsigned int indent, i, j; - const char *indent_str; -- void *code; - - static const struct vkd3d_d3d_asm_colours no_colours = - { -@@ -2064,6 +2367,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - .swizzle = "", - .version = "", - .write_mask = "", -+ .label = "", - }; - static const struct vkd3d_d3d_asm_colours colours = - { -@@ -2076,6 +2380,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - .swizzle = "\x1b[93m", - .version = "\x1b[36m", - .write_mask = "\x1b[93m", -+ .label = "\x1b[91m", - }; - - formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT -@@ -2109,6 +2414,17 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - shader_get_type_prefix(shader_version->type), shader_version->major, - shader_version->minor, compiler.colours.reset); - -+ /* The signatures we emit only make sense for DXBC shaders. D3DBC -+ * doesn't even have an explicit concept of signature. */ -+ if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) -+ { -+ if ((result = dump_signatures(&compiler, program)) < 0) -+ { -+ vkd3d_string_buffer_cleanup(buffer); -+ return result; -+ } -+ } -+ - indent = 0; - for (i = 0; i < program->instructions.count; ++i) - { -@@ -2124,6 +2440,14 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - --indent; - break; - -+ case VKD3DSIH_LABEL: -+ case VKD3DSIH_HS_DECLS: -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ indent = 0; -+ break; -+ - default: - break; - } -@@ -2142,6 +2466,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - case VKD3DSIH_IFC: - case VKD3DSIH_LOOP: - case VKD3DSIH_SWITCH: -+ case VKD3DSIH_LABEL: - ++indent; - break; - -@@ -2150,18 +2475,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, - } - } - -- if ((code = vkd3d_malloc(buffer->content_size))) -- { -- memcpy(code, buffer->buffer, buffer->content_size); -- out->size = buffer->content_size; -- out->code = code; -- } -- else -- { -- result = VKD3D_ERROR_OUT_OF_MEMORY; -- } -- -- vkd3d_string_buffer_cleanup(buffer); -+ vkd3d_shader_code_from_string_buffer(out, buffer); - - return result; - } -@@ -2171,7 +2485,7 @@ void vkd3d_shader_trace(const struct vsir_program *program) - const char *p, *q, *end; - struct vkd3d_shader_code code; - -- if (vkd3d_dxbc_binary_to_text(program, NULL, &code, VSIR_ASM_VSIR) != VKD3D_OK) -+ if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) - return; - - end = (const char *)code.code + code.size; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 27f5c810436..cda73d48fc0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -215,8 +215,12 @@ struct vkd3d_shader_sm1_parser - - struct vkd3d_shader_parser p; - -+ struct -+ { - #define MAX_CONSTANT_COUNT 8192 -- uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; -+ uint32_t def_mask[VKD3D_BITMAP_SIZE(MAX_CONSTANT_COUNT)]; -+ uint32_t count; -+ } constants[3]; - }; - - /* This table is not order or position dependent. */ -@@ -392,11 +396,6 @@ static const enum vkd3d_shader_resource_type resource_type_table[] = - /* VKD3D_SM1_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, - }; - --static struct vkd3d_shader_sm1_parser *vkd3d_shader_sm1_parser(struct vkd3d_shader_parser *parser) --{ -- return CONTAINING_RECORD(parser, struct vkd3d_shader_sm1_parser, p); --} -- - static uint32_t read_u32(const uint32_t **ptr) - { - return *(*ptr)++; -@@ -414,7 +413,7 @@ static bool has_relative_address(uint32_t param) - static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info( - const struct vkd3d_shader_sm1_parser *sm1, enum vkd3d_sm1_opcode opcode) - { -- const struct vkd3d_shader_version *version = &sm1->p.program.shader_version; -+ const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; - const struct vkd3d_sm1_opcode_info *info; - unsigned int i = 0; - -@@ -537,13 +536,14 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, - unsigned int register_index, bool is_dcl, unsigned int mask) - { -+ struct vsir_program *program = sm1->p.program; - struct shader_signature *signature; - struct signature_element *element; - - if (output) -- signature = &sm1->p.shader_desc.output_signature; -+ signature = &program->output_signature; - else -- signature = &sm1->p.shader_desc.input_signature; -+ signature = &program->input_signature; - - if ((element = find_signature_element(signature, name, index))) - { -@@ -568,7 +568,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - element->register_count = 1; - element->mask = mask; - element->used_mask = is_dcl ? 0 : mask; -- if (sm1->p.program.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) - element->interpolation_mode = VKD3DSIM_LINEAR; - - return true; -@@ -577,13 +577,14 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, - unsigned int register_index, unsigned int mask) - { -+ struct vsir_program *program = sm1->p.program; - struct shader_signature *signature; - struct signature_element *element; - - if (output) -- signature = &sm1->p.shader_desc.output_signature; -+ signature = &program->output_signature; - else -- signature = &sm1->p.shader_desc.input_signature; -+ signature = &program->input_signature; - - if (!(element = find_signature_element_by_register_index(signature, register_index))) - { -@@ -598,7 +599,7 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, - static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, - const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) - { -- const struct vkd3d_shader_version *version = &sm1->p.program.shader_version; -+ const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; - unsigned int register_index = reg->idx[0].offset; - - switch (reg->type) -@@ -701,7 +702,7 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * - static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, - const struct vkd3d_shader_semantic *semantic) - { -- const struct vkd3d_shader_version *version = &sm1->p.program.shader_version; -+ const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; - const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; - enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; - unsigned int mask = semantic->resource.reg.write_mask; -@@ -750,22 +751,20 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * - static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, - enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) - { -- struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; -- -- desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); -+ sm1->constants[set].count = max(sm1->constants[set].count, index + 1); - if (from_def) - { - /* d3d shaders have a maximum of 8192 constants; we should not overrun - * this array. */ -- assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); -- bitmap_set(sm1->constant_def_mask[set], index); -+ assert((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); -+ bitmap_set(sm1->constants[set].def_mask, index); - } - } - - static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, - const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) - { -- struct vsir_program *program = &sm1->p.program; -+ struct vsir_program *program = sm1->p.program; - uint32_t register_index = reg->idx[0].offset; - - switch (reg->type) -@@ -826,7 +825,7 @@ static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, - * VS >= 2.0 have relative addressing (with token) - * VS >= 1.0 < 2.0 have relative addressing (without token) - * The version check below should work in general. */ -- if (sm1->p.program.shader_version.major < 2) -+ if (sm1->p.program->shader_version.major < 2) - { - *addr_token = (1u << 31) - | ((VKD3DSPR_ADDR << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2) -@@ -855,7 +854,7 @@ static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, co - /* Version 2.0+ shaders may contain address tokens, but fortunately they - * have a useful length mask - use it here. Version 1.x shaders contain no - * such tokens. */ -- if (sm1->p.program.shader_version.major >= 2) -+ if (sm1->p.program->shader_version.major >= 2) - { - length = (opcode_token & VKD3D_SM1_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; - *ptr += length; -@@ -881,15 +880,6 @@ static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, co - *ptr += (opcode_info->dst_count + opcode_info->src_count); - } - --static void shader_sm1_destroy(struct vkd3d_shader_parser *parser) --{ -- struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); -- -- vsir_program_cleanup(&parser->program); -- free_shader_desc(&sm1->p.shader_desc); -- vkd3d_free(sm1); --} -- - static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, - struct vkd3d_shader_src_param *src_param) - { -@@ -899,7 +889,7 @@ static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const - shader_sm1_read_param(sm1, ptr, &token, &addr_token); - if (has_relative_address(token)) - { -- if (!(src_rel_addr = vsir_program_get_src_params(&sm1->p.program, 1))) -+ if (!(src_rel_addr = vsir_program_get_src_params(sm1->p.program, 1))) - { - vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, - "Out of memory."); -@@ -920,7 +910,7 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const - shader_sm1_read_param(sm1, ptr, &token, &addr_token); - if (has_relative_address(token)) - { -- if (!(dst_rel_addr = vsir_program_get_src_params(&sm1->p.program, 1))) -+ if (!(dst_rel_addr = vsir_program_get_src_params(sm1->p.program, 1))) - { - vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, - "Out of memory."); -@@ -1089,7 +1079,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - { - struct vkd3d_shader_src_param *src_params, *predicate; - const struct vkd3d_sm1_opcode_info *opcode_info; -- struct vsir_program *program = &sm1->p.program; -+ struct vsir_program *program = sm1->p.program; - struct vkd3d_shader_dst_param *dst_param; - const uint32_t **ptr = &sm1->ptr; - uint32_t opcode_token; -@@ -1226,18 +1216,12 @@ static bool shader_sm1_is_end(struct vkd3d_shader_sm1_parser *sm1) - return false; - } - --const struct vkd3d_shader_parser_ops shader_sm1_parser_ops = --{ -- .parser_destroy = shader_sm1_destroy, --}; -- --static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, -+static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) - { - const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; - const uint32_t *code = compile_info->source.code; - size_t code_size = compile_info->source.size; -- struct vkd3d_shader_desc *shader_desc; - struct vkd3d_shader_version version; - uint16_t shader_type; - size_t token_count; -@@ -1287,12 +1271,10 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, - sm1->end = &code[token_count]; - - /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops, -- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) -+ if (!vsir_program_init(program, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- shader_desc = &sm1->p.shader_desc; -- shader_desc->byte_code = code; -- shader_desc->byte_code_size = code_size; -+ -+ vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); - sm1->ptr = sm1->start; - - return VKD3D_OK; -@@ -1306,77 +1288,68 @@ static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, - /* Find the highest constant index which is not written by a DEF - * instruction. We can't (easily) use an FFZ function for this since it - * needs to be limited by the highest used register index. */ -- for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) -+ for (j = sm1->constants[set].count; j > 0; --j) - { -- if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) -+ if (!bitmap_is_set(sm1->constants[set].def_mask, j - 1)) - return j; - } - - return 0; - } - --int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -+int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, -+ struct vkd3d_shader_message_context *message_context, struct vsir_program *program) - { - struct vkd3d_shader_instruction_array *instructions; -+ struct vkd3d_shader_sm1_parser sm1 = {0}; - struct vkd3d_shader_instruction *ins; -- struct vkd3d_shader_sm1_parser *sm1; - unsigned int i; - int ret; - -- if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) -- { -- ERR("Failed to allocate parser.\n"); -- return VKD3D_ERROR_OUT_OF_MEMORY; -- } -- -- if ((ret = shader_sm1_init(sm1, compile_info, message_context)) < 0) -+ if ((ret = shader_sm1_init(&sm1, program, compile_info, message_context)) < 0) - { - WARN("Failed to initialise shader parser, ret %d.\n", ret); -- vkd3d_free(sm1); - return ret; - } - -- instructions = &sm1->p.program.instructions; -- while (!shader_sm1_is_end(sm1)) -+ instructions = &program->instructions; -+ while (!shader_sm1_is_end(&sm1)) - { - if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) - { - ERR("Failed to allocate instructions.\n"); -- vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); -- shader_sm1_destroy(&sm1->p); -+ vkd3d_shader_parser_error(&sm1.p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); -+ vsir_program_cleanup(program); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ins = &instructions->elements[instructions->count]; -- shader_sm1_read_instruction(sm1, ins); -+ shader_sm1_read_instruction(&sm1, ins); - - if (ins->handler_idx == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); -- shader_sm1_destroy(&sm1->p); -+ vsir_program_cleanup(program); - return VKD3D_ERROR_INVALID_SHADER; - } - ++instructions->count; - } - -- for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) -- sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); -+ for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) -+ program->flat_constant_count[i] = get_external_constant_count(&sm1, i); - -- if (!sm1->p.failed) -- ret = vsir_validate(&sm1->p); -+ if (!sm1.p.failed) -+ ret = vkd3d_shader_parser_validate(&sm1.p, config_flags); - -- if (sm1->p.failed && ret >= 0) -+ if (sm1.p.failed && ret >= 0) - ret = VKD3D_ERROR_INVALID_SHADER; - - if (ret < 0) - { - WARN("Failed to parse shader.\n"); -- shader_sm1_destroy(&sm1->p); -+ vsir_program_cleanup(program); - return ret; - } - -- *parser = &sm1->p; -- - return ret; - } - -@@ -1499,47 +1472,74 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns - return D3DPS_VERSION(major, minor); - } - --static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) -+D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - { - switch (type->class) - { - case HLSL_CLASS_ARRAY: -- return sm1_class(type->e.array.type); -+ return hlsl_sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3DXPC_MATRIX_COLUMNS; - else - return D3DXPC_MATRIX_ROWS; -- case HLSL_CLASS_OBJECT: -- return D3DXPC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3DXPC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3DXPC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3DXPC_VECTOR; -- default: -- ERR("Invalid class %#x.\n", type->class); -- vkd3d_unreachable(); -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_VERTEX_SHADER: -+ return D3DXPC_OBJECT; -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VOID: -+ break; - } -+ -+ vkd3d_unreachable(); - } - --static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) -+D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - { -- switch (type->base_type) -+ switch (type->class) - { -- case HLSL_TYPE_BOOL: -- return D3DXPT_BOOL; -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- return D3DXPT_FLOAT; -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- return D3DXPT_INT; -- case HLSL_TYPE_PIXELSHADER: -- return D3DXPT_PIXELSHADER; -- case HLSL_TYPE_SAMPLER: -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ switch (type->e.numeric.type) -+ { -+ case HLSL_TYPE_BOOL: -+ return D3DXPT_BOOL; -+ /* Actually double behaves differently depending on DLL version: -+ * For <= 36, it maps to D3DXPT_FLOAT. -+ * For 37-40, it maps to zero (D3DXPT_VOID). -+ * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* -+ * values are mostly compatible with D3DXPT_*). -+ * However, the latter two cases look like bugs, and a reasonable -+ * application certainly wouldn't know what to do with them. -+ * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ -+ case HLSL_TYPE_DOUBLE: -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ return D3DXPT_FLOAT; -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ return D3DXPT_INT; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ case HLSL_CLASS_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: -@@ -1557,9 +1557,8 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) - vkd3d_unreachable(); - } - break; -- case HLSL_TYPE_STRING: -- return D3DXPT_STRING; -- case HLSL_TYPE_TEXTURE: -+ -+ case HLSL_CLASS_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: -@@ -1577,13 +1576,33 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) - vkd3d_unreachable(); - } - break; -- case HLSL_TYPE_VERTEXSHADER: -- return D3DXPT_VERTEXSHADER; -- case HLSL_TYPE_VOID: -+ -+ case HLSL_CLASS_ARRAY: -+ return hlsl_sm1_base_type(type->e.array.type); -+ -+ case HLSL_CLASS_STRUCT: - return D3DXPT_VOID; -- default: -- vkd3d_unreachable(); -+ -+ case HLSL_CLASS_STRING: -+ return D3DXPT_STRING; -+ -+ case HLSL_CLASS_PIXEL_SHADER: -+ return D3DXPT_PIXELSHADER; -+ -+ case HLSL_CLASS_VERTEX_SHADER: -+ return D3DXPT_VERTEXSHADER; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VOID: -+ break; - } -+ -+ vkd3d_unreachable(); - } - - static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) -@@ -1620,7 +1639,7 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ - } - } - -- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); -@@ -1670,7 +1689,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { -- if (var->semantic.name || !var->regs[r].allocated) -+ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - ++uniform_count; -@@ -1708,14 +1727,14 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { -- if (var->semantic.name || !var->regs[r].allocated) -+ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - put_u32(buffer, 0); /* name */ - if (r == HLSL_REGSET_NUMERIC) - { - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); -- put_u32(buffer, var->data_type->reg_size[r] / 4); -+ put_u32(buffer, var->bind_count[r]); - } - else - { -@@ -1737,7 +1756,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - { - size_t var_offset, name_offset; - -- if (var->semantic.name || !var->regs[r].allocated) -+ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); -@@ -1969,24 +1988,21 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); - -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- /* Integers are internally represented as floats, so no change is necessary.*/ -+ case HLSL_TYPE_BOOL: -+ /* Integrals are internally represented as floats, so no change is necessary.*/ - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - -- case HLSL_TYPE_BOOL: -- hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to float."); -- break; -- - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); - break; -@@ -1998,11 +2014,14 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- switch(src_type->base_type) -+ switch(src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- /* A compilation pass applies a FLOOR operation to casts to int, so no change is necessary. */ -+ /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not -+ * reach this case unless we are missing something. */ -+ hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); -+ break; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -@@ -2067,6 +2086,9 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - D3DDECLUSAGE usage; - bool ret; - -+ if ((!output && !var->last_read) || (output && !var->first_write)) -+ return; -+ - if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) - { - usage = 0; -@@ -2242,13 +2264,19 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - - assert(instr->reg.allocated); - -+ if (expr->op == HLSL_OP1_REINTERPRET) -+ { -+ write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ return; -+ } -+ - if (expr->op == HLSL_OP1_CAST) - { - write_sm1_cast(ctx, buffer, instr); - return; - } - -- if (instr->data_type->base_type != HLSL_TYPE_FLOAT) -+ if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); -@@ -2329,7 +2357,23 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - break; - -+ case HLSL_OP2_LOGIC_AND: -+ write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ -+ case HLSL_OP2_LOGIC_OR: -+ write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ -+ case HLSL_OP2_SLT: -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); -+ write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ - case HLSL_OP3_CMP: -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -+ hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); - write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - -@@ -2488,7 +2532,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - - if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) - { -- FIXME("Matrix writemasks need to be lowered.\n"); -+ hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks.\n"); - return; - } - -@@ -2552,19 +2596,11 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - { - if (instr->data_type) - { -- if (instr->data_type->class == HLSL_CLASS_MATRIX) -+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { -- /* These need to be lowered. */ -- hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); -- continue; -- } -- else if (instr->data_type->class == HLSL_CLASS_OBJECT) -- { -- hlsl_fixme(ctx, &instr->loc, "Object copy."); -+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); - break; - } -- -- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); - } - - switch (instr->type) -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 37ebc73c099..8a1012d909b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -150,7 +150,7 @@ static const char *shader_get_string(const char *data, size_t data_size, size_t - } - - static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, -- const char *source_name, struct vkd3d_shader_dxbc_desc *desc) -+ const char *source_name, uint32_t flags, struct vkd3d_shader_dxbc_desc *desc) - { - const struct vkd3d_shader_location location = {.source_name = source_name}; - struct vkd3d_shader_dxbc_section_desc *sections, *section; -@@ -186,17 +186,20 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ - checksum[1] = read_u32(&ptr); - checksum[2] = read_u32(&ptr); - checksum[3] = read_u32(&ptr); -- vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); -- if (memcmp(checksum, calculated_checksum, sizeof(checksum))) -- { -- WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " -- "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", -- checksum[0], checksum[1], checksum[2], checksum[3], -- calculated_checksum[0], calculated_checksum[1], -- calculated_checksum[2], calculated_checksum[3]); -- vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, -- "Invalid DXBC checksum."); -- return VKD3D_ERROR_INVALID_ARGUMENT; -+ if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) -+ { -+ vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); -+ if (memcmp(checksum, calculated_checksum, sizeof(checksum))) -+ { -+ WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " -+ "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", -+ checksum[0], checksum[1], checksum[2], checksum[3], -+ calculated_checksum[0], calculated_checksum[1], -+ calculated_checksum[2], calculated_checksum[3]); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, -+ "Invalid DXBC checksum."); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } - } - - version = read_u32(&ptr); -@@ -287,7 +290,7 @@ static int for_each_dxbc_section(const struct vkd3d_shader_code *dxbc, - unsigned int i; - int ret; - -- if ((ret = parse_dxbc(dxbc, message_context, source_name, &desc)) < 0) -+ if ((ret = parse_dxbc(dxbc, message_context, source_name, 0, &desc)) < 0) - return ret; - - for (i = 0; i < desc.section_count; ++i) -@@ -313,7 +316,7 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, - *messages = NULL; - vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); - -- ret = parse_dxbc(dxbc, &message_context, NULL, desc); -+ ret = parse_dxbc(dxbc, &message_context, NULL, flags, desc); - - vkd3d_shader_message_context_trace_messages(&message_context); - if (!vkd3d_shader_message_context_copy_messages(&message_context, messages) && ret >= 0) -@@ -485,7 +488,7 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - struct vkd3d_shader_message_context *message_context, void *context) - { -- struct vkd3d_shader_desc *desc = context; -+ struct dxbc_shader_desc *desc = context; - int ret; - - switch (section->tag) -@@ -550,7 +553,7 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - return VKD3D_OK; - } - --void free_shader_desc(struct vkd3d_shader_desc *desc) -+void free_dxbc_shader_desc(struct dxbc_shader_desc *desc) - { - shader_signature_cleanup(&desc->input_signature); - shader_signature_cleanup(&desc->output_signature); -@@ -558,7 +561,7 @@ void free_shader_desc(struct vkd3d_shader_desc *desc) - } - - int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, -- struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) -+ struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc) - { - int ret; - -@@ -569,7 +572,7 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - if (ret < 0) - { - WARN("Failed to parse shader, vkd3d result %d.\n", ret); -- free_shader_desc(desc); -+ free_dxbc_shader_desc(desc); - } - - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 26a8a5c1cc3..4943a586680 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -31,12 +31,16 @@ static const uint64_t GLOBALVAR_FLAG_EXPLICIT_TYPE = 2; - static const unsigned int GLOBALVAR_ADDRESS_SPACE_SHIFT = 2; - static const uint64_t ALLOCA_FLAG_IN_ALLOCA = 0x20; - static const uint64_t ALLOCA_FLAG_EXPLICIT_TYPE = 0x40; --static const uint64_t ALLOCA_ALIGNMENT_MASK = ALLOCA_FLAG_IN_ALLOCA - 1; -+static const uint64_t ALLOCA_ALIGNMENT_MASK = 0x1f; - static const unsigned int SHADER_DESCRIPTOR_TYPE_COUNT = 4; - static const size_t MAX_IR_INSTRUCTIONS_PER_DXIL_INSTRUCTION = 11; - - static const unsigned int dx_max_thread_group_size[3] = {1024, 1024, 64}; - -+static const unsigned int MAX_GS_INSTANCE_COUNT = 32; /* kMaxGSInstanceCount */ -+static const unsigned int MAX_GS_OUTPUT_TOTAL_SCALARS = 1024; /* kMaxGSOutputTotalScalars */ -+static const unsigned int MAX_GS_OUTPUT_STREAMS = 4; -+ - #define VKD3D_SHADER_SWIZZLE_64_MASK \ - (VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(0) \ - | VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(1)) -@@ -103,6 +107,7 @@ enum bitcode_constant_code - CST_CODE_INTEGER = 4, - CST_CODE_FLOAT = 6, - CST_CODE_STRING = 8, -+ CST_CODE_CE_CAST = 11, - CST_CODE_CE_GEP = 12, - CST_CODE_CE_INBOUNDS_GEP = 20, - CST_CODE_DATA = 22, -@@ -282,6 +287,18 @@ enum dxil_element_additional_tag - ADDITIONAL_TAG_USED_MASK = 3, - }; - -+enum dxil_input_primitive -+{ -+ INPUT_PRIMITIVE_UNDEFINED = 0, -+ INPUT_PRIMITIVE_POINT = 1, -+ INPUT_PRIMITIVE_LINE = 2, -+ INPUT_PRIMITIVE_TRIANGLE = 3, -+ INPUT_PRIMITIVE_LINEWITHADJACENCY = 6, -+ INPUT_PRIMITIVE_TRIANGLEWITHADJACENY = 7, -+ INPUT_PRIMITIVE_PATCH1 = 8, -+ INPUT_PRIMITIVE_PATCH32 = 39, -+}; -+ - enum dxil_shader_properties_tag - { - SHADER_PROPERTIES_FLAGS = 0, -@@ -342,6 +359,8 @@ enum dx_intrinsic_opcode - { - DX_LOAD_INPUT = 4, - DX_STORE_OUTPUT = 5, -+ DX_FABS = 6, -+ DX_SATURATE = 7, - DX_ISNAN = 8, - DX_ISINF = 9, - DX_ISFINITE = 10, -@@ -374,8 +393,15 @@ enum dx_intrinsic_opcode - DX_IMIN = 38, - DX_UMAX = 39, - DX_UMIN = 40, -+ DX_FMAD = 46, -+ DX_FMA = 47, -+ DX_IMAD = 48, -+ DX_UMAD = 49, - DX_IBFE = 51, - DX_UBFE = 52, -+ DX_DOT2 = 54, -+ DX_DOT3 = 55, -+ DX_DOT4 = 56, - DX_CREATE_HANDLE = 57, - DX_CBUFFER_LOAD_LEGACY = 59, - DX_SAMPLE = 60, -@@ -388,16 +414,54 @@ enum dx_intrinsic_opcode - DX_TEXTURE_STORE = 67, - DX_BUFFER_LOAD = 68, - DX_BUFFER_STORE = 69, -+ DX_BUFFER_UPDATE_COUNTER = 70, - DX_GET_DIMENSIONS = 72, -+ DX_TEXTURE_GATHER = 73, -+ DX_TEXTURE_GATHER_CMP = 74, -+ DX_TEX2DMS_GET_SAMPLE_POS = 75, -+ DX_RT_GET_SAMPLE_POS = 76, -+ DX_RT_GET_SAMPLE_COUNT = 77, - DX_ATOMIC_BINOP = 78, - DX_ATOMIC_CMP_XCHG = 79, -+ DX_BARRIER = 80, -+ DX_CALCULATE_LOD = 81, -+ DX_DISCARD = 82, - DX_DERIV_COARSEX = 83, - DX_DERIV_COARSEY = 84, - DX_DERIV_FINEX = 85, - DX_DERIV_FINEY = 86, -+ DX_COVERAGE = 91, -+ DX_THREAD_ID = 93, -+ DX_GROUP_ID = 94, -+ DX_THREAD_ID_IN_GROUP = 95, -+ DX_FLATTENED_THREAD_ID_IN_GROUP = 96, -+ DX_EMIT_STREAM = 97, -+ DX_CUT_STREAM = 98, -+ DX_EMIT_THEN_CUT_STREAM = 99, -+ DX_MAKE_DOUBLE = 101, - DX_SPLIT_DOUBLE = 102, -+ DX_LOAD_OUTPUT_CONTROL_POINT = 103, -+ DX_LOAD_PATCH_CONSTANT = 104, -+ DX_DOMAIN_LOCATION = 105, -+ DX_STORE_PATCH_CONSTANT = 106, -+ DX_OUTPUT_CONTROL_POINT_ID = 107, -+ DX_PRIMITIVE_ID = 108, -+ DX_WAVE_IS_FIRST_LANE = 110, -+ DX_WAVE_GET_LANE_INDEX = 111, -+ DX_WAVE_GET_LANE_COUNT = 112, -+ DX_WAVE_ANY_TRUE = 113, -+ DX_WAVE_ALL_TRUE = 114, -+ DX_WAVE_ACTIVE_ALL_EQUAL = 115, -+ DX_WAVE_ACTIVE_BALLOT = 116, -+ DX_WAVE_READ_LANE_AT = 117, -+ DX_WAVE_READ_LANE_FIRST = 118, -+ DX_WAVE_ACTIVE_OP = 119, -+ DX_WAVE_ACTIVE_BIT = 120, -+ DX_WAVE_PREFIX_OP = 121, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, -+ DX_WAVE_ALL_BIT_COUNT = 135, -+ DX_WAVE_PREFIX_BIT_COUNT = 136, - DX_RAW_BUFFER_LOAD = 139, - DX_RAW_BUFFER_STORE = 140, - }; -@@ -449,6 +513,32 @@ enum dxil_predicate - ICMP_SLE = 41, - }; - -+enum dxil_rmw_code -+{ -+ RMW_XCHG = 0, -+ RMW_ADD = 1, -+ RMW_SUB = 2, -+ RMW_AND = 3, -+ RMW_NAND = 4, -+ RMW_OR = 5, -+ RMW_XOR = 6, -+ RMW_MAX = 7, -+ RMW_MIN = 8, -+ RMW_UMAX = 9, -+ RMW_UMIN = 10, -+}; -+ -+enum dxil_atomic_ordering -+{ -+ ORDERING_NOTATOMIC = 0, -+ ORDERING_UNORDERED = 1, -+ ORDERING_MONOTONIC = 2, -+ ORDERING_ACQUIRE = 3, -+ ORDERING_RELEASE = 4, -+ ORDERING_ACQREL = 5, -+ ORDERING_SEQCST = 6, -+}; -+ - enum dxil_atomic_binop_code - { - ATOMIC_BINOP_ADD, -@@ -463,6 +553,29 @@ enum dxil_atomic_binop_code - ATOMIC_BINOP_INVALID, - }; - -+enum dxil_sync_flags -+{ -+ SYNC_THREAD_GROUP = 0x1, -+ SYNC_GLOBAL_UAV = 0x2, -+ SYNC_THREAD_GROUP_UAV = 0x4, -+ SYNC_GROUP_SHARED_MEMORY = 0x8, -+}; -+ -+enum dxil_wave_bit_op_kind -+{ -+ WAVE_BIT_OP_AND = 0, -+ WAVE_BIT_OP_OR = 1, -+ WAVE_BIT_OP_XOR = 2, -+}; -+ -+enum dxil_wave_op_kind -+{ -+ WAVE_OP_ADD = 0, -+ WAVE_OP_MUL = 1, -+ WAVE_OP_MIN = 2, -+ WAVE_OP_MAX = 3, -+}; -+ - struct sm6_pointer_info - { - const struct sm6_type *type; -@@ -541,7 +654,9 @@ struct sm6_value - { - const struct sm6_type *type; - enum sm6_value_type value_type; -+ unsigned int structure_stride; - bool is_undefined; -+ bool is_back_ref; - union - { - struct sm6_function_data function; -@@ -736,9 +851,12 @@ struct sm6_parser - size_t global_symbol_count; - - const char *entry_point; -+ const char *patch_constant_function; - - struct vkd3d_shader_dst_param *output_params; - struct vkd3d_shader_dst_param *input_params; -+ struct vkd3d_shader_dst_param *patch_constant_params; -+ uint32_t io_regs_declared[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; - - struct sm6_function *functions; - size_t function_count; -@@ -753,6 +871,7 @@ struct sm6_parser - - unsigned int indexable_temp_count; - unsigned int icb_count; -+ unsigned int tgsm_count; - - struct sm6_value *values; - size_t value_count; -@@ -790,11 +909,6 @@ static size_t size_add_with_overflow_check(size_t a, size_t b) - return (i < a) ? SIZE_MAX : i; - } - --static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) --{ -- return CONTAINING_RECORD(parser, struct sm6_parser, p); --} -- - static bool sm6_parser_is_end(struct sm6_parser *sm6) - { - return sm6->ptr == sm6->end; -@@ -1876,6 +1990,25 @@ static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type - return NULL; - } - -+static const struct sm6_type *sm6_type_get_cmpxchg_result_struct(struct sm6_parser *sm6) -+{ -+ const struct sm6_type *type; -+ unsigned int i; -+ -+ for (i = 0; i < sm6->type_count; ++i) -+ { -+ type = &sm6->types[i]; -+ if (sm6_type_is_struct(type) && type->u.struc->elem_count == 2 -+ && sm6_type_is_i32(type->u.struc->elem_types[0]) -+ && sm6_type_is_bool(type->u.struc->elem_types[1])) -+ { -+ return type; -+ } -+ } -+ -+ return NULL; -+} -+ - /* Call for aggregate types only. */ - static const struct sm6_type *sm6_type_get_element_type_at_index(const struct sm6_type *type, uint64_t elem_idx) - { -@@ -2110,6 +2243,15 @@ static inline bool sm6_value_is_undef(const struct sm6_value *value) - return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; - } - -+static bool sm6_value_vector_is_constant_or_undef(const struct sm6_value **values, unsigned int count) -+{ -+ unsigned int i; -+ for (i = 0; i < count; ++i) -+ if (!sm6_value_is_constant(values[i]) && !sm6_value_is_undef(values[i])) -+ return false; -+ return true; -+} -+ - static bool sm6_value_is_icb(const struct sm6_value *value) - { - return value->value_type == VALUE_TYPE_ICB; -@@ -2120,6 +2262,11 @@ static bool sm6_value_is_ssa(const struct sm6_value *value) - return sm6_value_is_register(value) && register_is_ssa(&value->u.reg); - } - -+static bool sm6_value_is_numeric_array(const struct sm6_value *value) -+{ -+ return sm6_value_is_register(value) && register_is_numeric_array(&value->u.reg); -+} -+ - static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) - { - if (!sm6_value_is_constant(value)) -@@ -2153,7 +2300,7 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ - { - struct vkd3d_shader_src_param *params; - -- if (!(params = vsir_program_get_src_params(&sm6->p.program, count))) -+ if (!(params = vsir_program_get_src_params(sm6->p.program, count))) - { - ERR("Failed to allocate src params.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -@@ -2170,7 +2317,7 @@ static struct vkd3d_shader_dst_param *instruction_dst_params_alloc(struct vkd3d_ - { - struct vkd3d_shader_dst_param *params; - -- if (!(params = vsir_program_get_dst_params(&sm6->p.program, count))) -+ if (!(params = vsir_program_get_dst_params(sm6->p.program, count))) - { - ERR("Failed to allocate dst params.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -@@ -2199,6 +2346,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type - return VKD3D_DATA_BOOL; - case 8: - return VKD3D_DATA_UINT8; -+ case 16: -+ return VKD3D_DATA_UINT16; - case 32: - return VKD3D_DATA_UINT; - case 64: -@@ -2212,6 +2361,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type - { - switch (type->u.width) - { -+ case 16: -+ return VKD3D_DATA_HALF; - case 32: - return VKD3D_DATA_FLOAT; - case 64: -@@ -2252,6 +2403,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st - register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6); - } - -+static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value) -+{ -+ vsir_register_init(reg, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); -+ reg->u.immconst_u32[0] = value; -+} -+ - static void dst_param_init(struct vkd3d_shader_dst_param *param) - { - param->write_mask = VKD3DSP_WRITEMASK_0; -@@ -2301,6 +2458,12 @@ static void src_param_init_scalar(struct vkd3d_shader_src_param *param, unsigned - param->modifiers = VKD3DSPSM_NONE; - } - -+static void src_param_init_vector(struct vkd3d_shader_src_param *param, unsigned int component_count) -+{ -+ param->swizzle = VKD3D_SHADER_NO_SWIZZLE & ((1ull << VKD3D_SHADER_SWIZZLE_SHIFT(component_count)) - 1); -+ param->modifiers = VKD3DSPSM_NONE; -+} -+ - static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) - { - src_param_init(param); -@@ -2315,20 +2478,28 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, - param->reg = *reg; - } - -+static void src_param_make_constant_uint(struct vkd3d_shader_src_param *param, unsigned int value) -+{ -+ src_param_init(param); -+ register_make_constant_uint(¶m->reg, value); -+} -+ - static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, - struct sm6_parser *sm6) - { - if (sm6_value_is_constant(address)) - { - idx->offset = sm6_value_get_constant_uint(address); -+ idx->rel_addr = NULL; - } - else if (sm6_value_is_undef(address)) - { - idx->offset = 0; -+ idx->rel_addr = NULL; - } - else - { -- struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(&sm6->p.program, 1); -+ struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(sm6->p.program, 1); - if (rel_addr) - src_param_init_from_value(rel_addr, address); - idx->offset = 0; -@@ -2336,14 +2507,18 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, - } - } - --static void instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) -+static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) - { -- struct vkd3d_shader_dst_param *param = instruction_dst_params_alloc(ins, 1, sm6); - struct sm6_value *dst = sm6_parser_get_current_value(sm6); -+ struct vkd3d_shader_dst_param *param; -+ -+ if (!(param = instruction_dst_params_alloc(ins, 1, sm6))) -+ return false; - - dst_param_init_ssa_scalar(param, dst->type, dst, sm6); - param->write_mask = VKD3DSP_WRITEMASK_0; - dst->u.reg = param->reg; -+ return true; - } - - static void instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instruction *ins, -@@ -2399,7 +2574,7 @@ static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, - * overestimate the value count somewhat, but this should be no problem. */ - value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); - sm6->value_capacity = max(sm6->value_capacity, value_count); -- sm6->functions[sm6->function_count].value_count = value_count; -+ sm6->functions[sm6->function_count++].value_count = value_count; - /* The value count returns to its previous value after handling a function. */ - if (value_count < SIZE_MAX) - value_count = old_value_count; -@@ -2482,6 +2657,26 @@ static bool sm6_value_validate_is_texture_handle(const struct sm6_value *value, - return true; - } - -+static bool sm6_value_validate_is_texture_2dms_handle(const struct sm6_value *value, enum dx_intrinsic_opcode op, -+ struct sm6_parser *sm6) -+{ -+ enum dxil_resource_kind kind; -+ -+ if (!sm6_value_validate_is_handle(value, sm6)) -+ return false; -+ -+ kind = value->u.handle.d->kind; -+ if (!resource_kind_is_multisampled(kind)) -+ { -+ WARN("Resource kind %u for op %u is not a 2DMS texture.\n", kind, op); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_RESOURCE_HANDLE, -+ "Resource kind %u for texture operation %u is not a 2DMS texture.", kind, op); -+ return false; -+ } -+ -+ return true; -+} -+ - static bool sm6_value_validate_is_sampler_handle(const struct sm6_value *value, enum dx_intrinsic_opcode op, - struct sm6_parser *sm6) - { -@@ -2514,6 +2709,18 @@ static bool sm6_value_validate_is_pointer(const struct sm6_value *value, struct - return true; - } - -+static bool sm6_value_validate_is_backward_ref(const struct sm6_value *value, struct sm6_parser *sm6) -+{ -+ if (!value->is_back_ref) -+ { -+ FIXME("Forward-referenced pointers are not supported.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Forward-referenced pointer declarations are not supported."); -+ return false; -+ } -+ return true; -+} -+ - static bool sm6_value_validate_is_numeric(const struct sm6_value *value, struct sm6_parser *sm6) - { - if (!sm6_type_is_numeric(value->type)) -@@ -2539,6 +2746,30 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 - return true; - } - -+static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6) -+{ -+ if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type)) -+ { -+ WARN("Operand result type %u is not a pointer to i32.\n", value->type->class); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "An int32 pointer operand passed to a DXIL instruction is not an int32 pointer."); -+ return false; -+ } -+ return true; -+} -+ -+static bool sm6_value_validate_is_i32(const struct sm6_value *value, struct sm6_parser *sm6) -+{ -+ if (!sm6_type_is_i32(value->type)) -+ { -+ WARN("Operand result type %u is not i32.\n", value->type->class); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "An int32 operand passed to a DXIL instruction is not an int32."); -+ return false; -+ } -+ return true; -+} -+ - static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) - { - if (idx < sm6->value_count) -@@ -2686,7 +2917,7 @@ static inline uint64_t decode_rotated_signed_value(uint64_t value) - return value << 63; - } - --static inline float bitcast_uint64_to_float(uint64_t value) -+static float bitcast_uint_to_float(unsigned int value) - { - union - { -@@ -2710,6 +2941,23 @@ static inline double bitcast_uint64_to_double(uint64_t value) - return u.double_value; - } - -+static float register_get_float_value(const struct vkd3d_shader_register *reg) -+{ -+ if (!register_is_constant(reg) || !data_type_is_floating_point(reg->data_type)) -+ return 0.0; -+ -+ if (reg->dimension == VSIR_DIMENSION_VEC4) -+ WARN("Returning vec4.x.\n"); -+ -+ if (reg->type == VKD3DSPR_IMMCONST64) -+ { -+ WARN("Truncating double to float.\n"); -+ return bitcast_uint64_to_double(reg->u.immconst_u64[0]); -+ } -+ -+ return bitcast_uint_to_float(reg->u.immconst_u32[0]); -+} -+ - static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, const struct sm6_type *type, - const uint64_t *operands, struct sm6_parser *sm6) - { -@@ -2745,7 +2993,7 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co - "Out of memory allocating an immediate constant buffer of count %u.", count); - return VKD3D_ERROR_OUT_OF_MEMORY; - } -- if (!shader_instruction_array_add_icb(&sm6->p.program.instructions, icb)) -+ if (!shader_instruction_array_add_icb(&sm6->p.program->instructions, icb)) - { - ERR("Failed to store icb object.\n"); - vkd3d_free(icb); -@@ -2782,18 +3030,135 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co - return VKD3D_OK; - } - -+static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct sm6_value *dst) -+{ -+ const struct sm6_type *elem_type, *pointee_type, *gep_type, *ptr_type; -+ struct sm6_value *operands[3]; -+ unsigned int i, j, offset; -+ uint64_t value; -+ -+ i = 0; -+ pointee_type = (record->operand_count & 1) ? sm6_parser_get_type(sm6, record->operands[i++]) : NULL; -+ -+ if (!dxil_record_validate_operand_count(record, i + 6, i + 6, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ for (j = 0; i < record->operand_count; i += 2, ++j) -+ { -+ if (!(elem_type = sm6_parser_get_type(sm6, record->operands[i]))) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if ((value = record->operands[i + 1]) >= sm6->cur_max_value) -+ { -+ WARN("Invalid value index %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value index %"PRIu64".", value); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ else if (value == sm6->value_count) -+ { -+ WARN("Invalid value self-reference at %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value self-reference for a constexpr GEP."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ operands[j] = &sm6->values[value]; -+ if (value > sm6->value_count) -+ { -+ operands[j]->type = elem_type; -+ } -+ else if (operands[j]->type != elem_type) -+ { -+ WARN("Type mismatch.\n"); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, -+ "Type mismatch in constexpr GEP elements."); -+ } -+ } -+ -+ if (operands[0]->u.reg.idx_count > 1) -+ { -+ WARN("Unsupported stacked GEP.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "A GEP instruction on the result of a previous GEP is unsupported."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!sm6_value_is_constant_zero(operands[1])) -+ { -+ WARN("Expected constant zero.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "The pointer dereference index for a constexpr GEP instruction is not constant zero."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!sm6_value_is_constant(operands[2]) || !sm6_type_is_integer(operands[2]->type)) -+ { -+ WARN("Element index is not constant int.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "A constexpr GEP element index is not a constant integer."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ dst->structure_stride = operands[0]->structure_stride; -+ -+ ptr_type = operands[0]->type; -+ if (!sm6_type_is_pointer(ptr_type)) -+ { -+ WARN("Constexpr GEP base value is not a pointer.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "A constexpr GEP base value is not a pointer."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!pointee_type) -+ { -+ pointee_type = ptr_type->u.pointer.type; -+ } -+ else if (pointee_type != ptr_type->u.pointer.type) -+ { -+ WARN("Explicit pointee type mismatch.\n"); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, -+ "Explicit pointee type for constexpr GEP does not match the element type."); -+ } -+ -+ offset = sm6_value_get_constant_uint(operands[2]); -+ if (!(gep_type = sm6_type_get_element_type_at_index(pointee_type, offset))) -+ { -+ WARN("Failed to get element type.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Failed to get the element type of a constexpr GEP."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!(dst->type = sm6_type_get_pointer_to_type(gep_type, ptr_type->u.pointer.addr_space, sm6))) -+ { -+ WARN("Failed to get pointer type for type %u.\n", gep_type->class); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "Module does not define a pointer type for a constexpr GEP result."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ dst->u.reg = operands[0]->u.reg; -+ dst->u.reg.idx[1].offset = offset; -+ dst->u.reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP; -+ dst->u.reg.idx_count = 2; -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) - { - enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; -- const struct sm6_type *type, *elem_type; -+ const struct sm6_type *type, *elem_type, *ptr_type; -+ size_t i, base_value_idx, value_idx; - enum vkd3d_data_type reg_data_type; - const struct dxil_record *record; -+ const struct sm6_value *src; - enum vkd3d_result ret; - struct sm6_value *dst; -- size_t i, value_idx; - uint64_t value; - -- for (i = 0, type = NULL; i < block->record_count; ++i) -+ for (i = 0, type = NULL, base_value_idx = sm6->value_count; i < block->record_count; ++i) - { - sm6->p.location.column = i; - record = block->records[i]; -@@ -2834,6 +3199,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - dst = sm6_parser_get_current_value(sm6); - dst->type = type; - dst->value_type = VALUE_TYPE_REG; -+ dst->is_back_ref = true; - vsir_register_init(&dst->u.reg, reg_type, reg_data_type, 0); - - switch (record->code) -@@ -2876,9 +3242,9 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - } - - if (type->u.width == 16) -- FIXME("Half float type is not supported yet.\n"); -+ dst->u.reg.u.immconst_u32[0] = record->operands[0]; - else if (type->u.width == 32) -- dst->u.reg.u.immconst_f32[0] = bitcast_uint64_to_float(record->operands[0]); -+ dst->u.reg.u.immconst_f32[0] = bitcast_uint_to_float(record->operands[0]); - else if (type->u.width == 64) - dst->u.reg.u.immconst_f64[0] = bitcast_uint64_to_double(record->operands[0]); - else -@@ -2902,6 +3268,54 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - - break; - -+ case CST_CODE_CE_GEP: -+ case CST_CODE_CE_INBOUNDS_GEP: -+ if ((ret = sm6_parser_init_constexpr_gep(sm6, record, dst)) < 0) -+ return ret; -+ break; -+ -+ case CST_CODE_CE_CAST: -+ if (!dxil_record_validate_operand_count(record, 3, 3, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if ((value = record->operands[0]) != CAST_BITCAST) -+ { -+ WARN("Unhandled constexpr cast op %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Constexpr cast op %"PRIu64" is unhandled.", value); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ ptr_type = sm6_parser_get_type(sm6, record->operands[1]); -+ if (!sm6_type_is_pointer(ptr_type)) -+ { -+ WARN("Constexpr cast at constant idx %zu is not a pointer.\n", value_idx); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Constexpr cast source operand is not a pointer."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if ((value = record->operands[2]) >= sm6->cur_max_value) -+ { -+ WARN("Invalid value index %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value index %"PRIu64".", value); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ else if (value == value_idx) -+ { -+ WARN("Invalid value self-reference at %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value self-reference for a constexpr cast."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ /* Resolve later in case forward refs exist. */ -+ dst->type = type; -+ dst->u.reg.type = VKD3DSPR_COUNT; -+ dst->u.reg.idx[0].offset = value; -+ break; -+ - case CST_CODE_UNDEF: - dxil_record_validate_operand_max_count(record, 0, sm6); - dst->u.reg.type = VKD3DSPR_UNDEF; -@@ -2911,6 +3325,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - - default: - FIXME("Unhandled constant code %u.\n", record->code); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Constant code %u is unhandled.", record->code); - dst->u.reg.type = VKD3DSPR_UNDEF; - break; - } -@@ -2925,6 +3341,29 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - ++sm6->value_count; - } - -+ /* Resolve cast forward refs. */ -+ for (i = base_value_idx; i < sm6->value_count; ++i) -+ { -+ dst = &sm6->values[i]; -+ if (dst->u.reg.type != VKD3DSPR_COUNT) -+ continue; -+ -+ type = dst->type; -+ -+ src = &sm6->values[dst->u.reg.idx[0].offset]; -+ if (!sm6_value_is_numeric_array(src)) -+ { -+ WARN("Value is not an array.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Constexpr cast source value is not a global array element."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ *dst = *src; -+ dst->type = type; -+ dst->u.reg.data_type = vkd3d_data_type_from_sm6_type(type->u.pointer.type); -+ } -+ - return VKD3D_OK; - } - -@@ -2941,12 +3380,14 @@ static bool bitcode_parse_alignment(uint64_t encoded_alignment, unsigned int *al - - static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) - { -- if (!shader_instruction_array_reserve(&sm6->p.program.instructions, sm6->p.program.instructions.count + extra)) -+ struct vkd3d_shader_instruction_array *instructions = &sm6->p.program->instructions; -+ -+ if (!shader_instruction_array_reserve(instructions, instructions->count + extra)) - { - ERR("Failed to allocate instruction.\n"); - return NULL; - } -- return &sm6->p.program.instructions.elements[sm6->p.program.instructions.count]; -+ return &instructions->elements[instructions->count]; - } - - /* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ -@@ -2956,7 +3397,7 @@ static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_pa - struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); - assert(ins); - vsir_instruction_init(ins, &sm6->p.location, handler_idx); -- ++sm6->p.program.instructions.count; -+ ++sm6->p.program->instructions.count; - return ins; - } - -@@ -2994,6 +3435,58 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru - register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); - } - -+static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type, -+ unsigned int alignment, unsigned int init, struct sm6_value *dst) -+{ -+ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); -+ struct vkd3d_shader_instruction *ins; -+ unsigned int byte_count; -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW); -+ dst_param_init(&ins->declaration.tgsm_raw.reg); -+ register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); -+ dst->u.reg = ins->declaration.tgsm_raw.reg.reg; -+ dst->structure_stride = 0; -+ ins->declaration.tgsm_raw.alignment = alignment; -+ byte_count = elem_type->u.width / 8u; -+ if (byte_count != 4) -+ { -+ FIXME("Unsupported byte count %u.\n", byte_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Raw TGSM byte count %u is not supported.", byte_count); -+ } -+ ins->declaration.tgsm_raw.byte_count = byte_count; -+ /* The initialiser value index will be resolved later when forward references can be handled. */ -+ ins->flags = init; -+} -+ -+static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const struct sm6_type *elem_type, -+ unsigned int count, unsigned int alignment, unsigned int init, struct sm6_value *dst) -+{ -+ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); -+ struct vkd3d_shader_instruction *ins; -+ unsigned int structure_stride; -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_STRUCTURED); -+ dst_param_init(&ins->declaration.tgsm_structured.reg); -+ register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM, -+ data_type, sm6->tgsm_count++); -+ dst->u.reg = ins->declaration.tgsm_structured.reg.reg; -+ structure_stride = elem_type->u.width / 8u; -+ if (structure_stride != 4) -+ { -+ FIXME("Unsupported structure stride %u.\n", structure_stride); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Structured TGSM byte stride %u is not supported.", structure_stride); -+ } -+ dst->structure_stride = structure_stride; -+ ins->declaration.tgsm_structured.alignment = alignment; -+ ins->declaration.tgsm_structured.byte_stride = structure_stride; -+ ins->declaration.tgsm_structured.structure_count = count; -+ /* The initialiser value index will be resolved later when forward references can be handled. */ -+ ins->flags = init; -+} -+ - static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) - { - const struct sm6_type *type, *scalar_type; -@@ -3101,6 +3594,7 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ - dst = sm6_parser_get_current_value(sm6); - dst->type = type; - dst->value_type = VALUE_TYPE_REG; -+ dst->is_back_ref = true; - - if (is_constant && !init) - { -@@ -3119,10 +3613,17 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ - } - else if (address_space == ADDRESS_SPACE_GROUPSHARED) - { -- FIXME("Unsupported TGSM.\n"); -- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -- "TGSM global variables are not supported."); -- return false; -+ if (!sm6_type_is_numeric(scalar_type)) -+ { -+ WARN("Unsupported type class %u.\n", scalar_type->class); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "TGSM variables of type class %u are not supported.", scalar_type->class); -+ return false; -+ } -+ if (count == 1) -+ sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); -+ else -+ sm6_parser_declare_tgsm_structured(sm6, scalar_type, count, alignment, init, dst); - } - else - { -@@ -3158,17 +3659,49 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init - return NULL; - } - --static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) -+static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6) - { -- size_t i, count, base_value_idx = sm6->value_count; -- const struct dxil_block *block = &sm6->root_block; -- struct vkd3d_shader_instruction *ins; -- const struct dxil_record *record; -- enum vkd3d_result ret; -- uint64_t version; -+ const struct sm6_value *value; - -- sm6->p.location.line = block->id; -- sm6->p.location.column = 0; -+ if (!index) -+ return false; -+ -+ --index; -+ if (!(value = sm6_parser_get_value_safe(sm6, index)) -+ || (!sm6_value_is_icb(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) -+ { -+ WARN("Invalid initialiser index %zu.\n", index); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "TGSM initialiser value index %zu is invalid.", index); -+ return false; -+ } -+ else if ((sm6_value_is_icb(value) && value->u.icb->is_null) || sm6_value_is_constant_zero(value)) -+ { -+ return true; -+ } -+ else if (sm6_value_is_undef(value)) -+ { -+ /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */ -+ return false; -+ } -+ -+ FIXME("Non-zero initialisers are not supported.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Non-zero TGSM initialisers are not supported."); -+ return false; -+} -+ -+static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) -+{ -+ size_t i, count, base_value_idx = sm6->value_count; -+ const struct dxil_block *block = &sm6->root_block; -+ struct vkd3d_shader_instruction *ins; -+ const struct dxil_record *record; -+ enum vkd3d_result ret; -+ uint64_t version; -+ -+ sm6->p.location.line = block->id; -+ sm6->p.location.column = 0; - - for (i = 0, count = 0; i < block->record_count; ++i) - count += block->records[i]->code == MODULE_CODE_GLOBALVAR; -@@ -3219,9 +3752,9 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - } - - /* Resolve initialiser forward references. */ -- for (i = 0; i < sm6->p.program.instructions.count; ++i) -+ for (i = 0; i < sm6->p.program->instructions.count; ++i) - { -- ins = &sm6->p.program.instructions.elements[i]; -+ ins = &sm6->p.program->instructions.elements[i]; - if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) - { - ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( -@@ -3231,6 +3764,16 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - { - ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); - } -+ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) -+ { -+ ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); -+ ins->flags = 0; -+ } -+ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) -+ { -+ ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); -+ ins->flags = 0; -+ } - } - for (i = base_value_idx; i < sm6->value_count; ++i) - { -@@ -3270,22 +3813,80 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par - src_param_init_from_value(&src_params[i], operands[i]); - } - -+static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( -+ enum vkd3d_shader_sysval_semantic sysval_semantic) -+{ -+ switch (sysval_semantic) -+ { -+ case VKD3D_SHADER_SV_COVERAGE: -+ return VKD3DSPR_COVERAGE; -+ case VKD3D_SHADER_SV_DEPTH: -+ return VKD3DSPR_DEPTHOUT; -+ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: -+ return VKD3DSPR_DEPTHOUTGE; -+ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: -+ return VKD3DSPR_DEPTHOUTLE; -+ default: -+ return VKD3DSPR_INVALID; -+ } -+} -+ - static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, -- enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) -+ bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) - { -+ enum vkd3d_shader_type shader_type = sm6->p.program->shader_version.type; -+ bool is_patch_constant, is_control_point; - struct vkd3d_shader_dst_param *param; - const struct signature_element *e; - unsigned int i, count; - -+ is_patch_constant = reg_type == VKD3DSPR_PATCHCONST; -+ -+ is_control_point = false; -+ if (!is_patch_constant) -+ { -+ switch (shader_type) -+ { -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ case VKD3D_SHADER_TYPE_GEOMETRY: -+ is_control_point = is_input; -+ break; -+ -+ case VKD3D_SHADER_TYPE_HULL: -+ is_control_point = true; -+ break; -+ -+ default: -+ break; -+ } -+ } -+ - for (i = 0; i < s->element_count; ++i) - { - e = &s->elements[i]; - - param = ¶ms[i]; -+ -+ if (e->register_index == UINT_MAX) -+ { -+ dst_param_io_init(param, e, register_type_from_dxil_semantic_kind(e->sysval_semantic)); -+ continue; -+ } -+ - dst_param_io_init(param, e, reg_type); - count = 0; -- if (e->register_count > 1) -+ -+ if (is_control_point) -+ { -+ if (reg_type == VKD3DSPR_OUTPUT) -+ param->reg.idx[count].rel_addr = instruction_array_create_outpointid_param(&sm6->p.program->instructions); -+ param->reg.idx[count++].offset = 0; -+ } -+ -+ if (e->register_count > 1 || (is_patch_constant && vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) - param->reg.idx[count++].offset = 0; -+ -+ assert(count < ARRAY_SIZE(param->reg.idx)); - param->reg.idx[count++].offset = i; - param->reg.idx_count = count; - } -@@ -3293,12 +3894,21 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - - static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) - { -- sm6_parser_init_signature(sm6, output_signature, VKD3DSPR_OUTPUT, sm6->output_params); -+ sm6_parser_init_signature(sm6, output_signature, false, VKD3DSPR_OUTPUT, sm6->output_params); - } - - static void sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) - { -- sm6_parser_init_signature(sm6, input_signature, VKD3DSPR_INPUT, sm6->input_params); -+ sm6_parser_init_signature(sm6, input_signature, true, VKD3DSPR_INPUT, sm6->input_params); -+} -+ -+static void sm6_parser_init_patch_constant_signature(struct sm6_parser *sm6, -+ const struct shader_signature *patch_constant_signature) -+{ -+ bool is_input = sm6->p.program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; -+ -+ sm6_parser_init_signature(sm6, patch_constant_signature, is_input, VKD3DSPR_PATCHCONST, -+ sm6->patch_constant_params); - } - - static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) -@@ -3350,6 +3960,9 @@ struct function_emission_state - unsigned int temp_idx; - }; - -+static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, -+ unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg); -+ - static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, - struct vkd3d_shader_instruction *ins, struct sm6_value *dst) - { -@@ -3425,6 +4038,130 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec - sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); - } - -+static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) -+{ -+ switch (code) -+ { -+ case RMW_ADD: -+ return VKD3DSIH_IMM_ATOMIC_IADD; -+ case RMW_AND: -+ return VKD3DSIH_IMM_ATOMIC_AND; -+ case RMW_MAX: -+ return VKD3DSIH_IMM_ATOMIC_IMAX; -+ case RMW_MIN: -+ return VKD3DSIH_IMM_ATOMIC_IMIN; -+ case RMW_OR: -+ return VKD3DSIH_IMM_ATOMIC_OR; -+ case RMW_UMAX: -+ return VKD3DSIH_IMM_ATOMIC_UMAX; -+ case RMW_UMIN: -+ return VKD3DSIH_IMM_ATOMIC_UMIN; -+ case RMW_XCHG: -+ return VKD3DSIH_IMM_ATOMIC_EXCH; -+ case RMW_XOR: -+ return VKD3DSIH_IMM_ATOMIC_XOR; -+ default: -+ /* DXIL currently doesn't use SUB and NAND. */ -+ return VKD3DSIH_INVALID; -+ } -+} -+ -+static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct function_emission_state *state, struct sm6_value *dst) -+{ -+ struct vkd3d_shader_register coord, const_offset, const_zero; -+ const struct vkd3d_shader_register *regs[2]; -+ struct vkd3d_shader_dst_param *dst_params; -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ const struct sm6_value *ptr, *src; -+ enum vkd3d_shader_opcode op; -+ unsigned int i = 0; -+ bool is_volatile; -+ uint64_t code; -+ -+ if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -+ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) -+ || !sm6_value_validate_is_backward_ref(ptr, sm6)) -+ return; -+ -+ if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) -+ { -+ WARN("Register is not groupshared.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "The destination register for an atomicrmw instruction is not groupshared memory."); -+ return; -+ } -+ -+ dst->type = ptr->type->u.pointer.type; -+ -+ if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i))) -+ return; -+ -+ if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6)) -+ return; -+ -+ if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VKD3DSIH_INVALID) -+ { -+ FIXME("Unhandled atomicrmw op %"PRIu64".\n", code); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code); -+ return; -+ } -+ -+ is_volatile = record->operands[i++]; -+ -+ /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ -+ if ((code = record->operands[i++]) != ORDERING_SEQCST) -+ FIXME("Unhandled atomic ordering %"PRIu64".\n", code); -+ -+ if ((code = record->operands[i]) != 1) -+ WARN("Ignoring synchronisation scope %"PRIu64".\n", code); -+ -+ if (ptr->structure_stride) -+ { -+ if (ptr->u.reg.idx[1].rel_addr) -+ { -+ regs[0] = &ptr->u.reg.idx[1].rel_addr->reg; -+ } -+ else -+ { -+ register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset); -+ regs[0] = &const_offset; -+ } -+ register_make_constant_uint(&const_zero, 0); -+ regs[1] = &const_zero; -+ if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) -+ return; -+ } -+ -+ ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, op); -+ ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; -+ -+ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) -+ return; -+ if (ptr->structure_stride) -+ src_param_init_vector_from_reg(&src_params[0], &coord); -+ else -+ src_param_make_constant_uint(&src_params[0], 0); -+ src_param_init_from_value(&src_params[1], src); -+ -+ dst_params = instruction_dst_params_alloc(ins, 2, sm6); -+ register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); -+ dst_param_init(&dst_params[0]); -+ -+ dst_params[1].reg = ptr->u.reg; -+ /* The groupshared register has data type UAV when accessed. */ -+ dst_params[1].reg.data_type = VKD3D_DATA_UAV; -+ dst_params[1].reg.idx[1].rel_addr = NULL; -+ dst_params[1].reg.idx[1].offset = ~0u; -+ dst_params[1].reg.idx_count = 1; -+ dst_param_init(&dst_params[1]); -+ -+ dst->u.reg = dst_params[0].reg; -+} -+ - static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, - const struct sm6_type *type_b, struct sm6_parser *sm6) - { -@@ -3756,6 +4493,25 @@ static bool sm6_parser_emit_coordinate_construct(struct sm6_parser *sm6, const s - return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); - } - -+static enum vkd3d_shader_opcode sm6_dx_map_void_op(enum dx_intrinsic_opcode op) -+{ -+ switch (op) -+ { -+ case DX_WAVE_IS_FIRST_LANE: -+ return VKD3DSIH_WAVE_IS_FIRST_LANE; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void sm6_parser_emit_dx_void(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_void_op(op)); -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) - { - switch (op) -@@ -3820,6 +4576,18 @@ static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) - return VKD3DSIH_F32TOF16; - case DX_LEGACY_F16TOF32: - return VKD3DSIH_F16TOF32; -+ case DX_WAVE_ACTIVE_ALL_EQUAL: -+ return VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL; -+ case DX_WAVE_ALL_BIT_COUNT: -+ return VKD3DSIH_WAVE_ALL_BIT_COUNT; -+ case DX_WAVE_ALL_TRUE: -+ return VKD3DSIH_WAVE_ALL_TRUE; -+ case DX_WAVE_ANY_TRUE: -+ return VKD3DSIH_WAVE_ANY_TRUE; -+ case DX_WAVE_PREFIX_BIT_COUNT: -+ return VKD3DSIH_WAVE_PREFIX_BIT_COUNT; -+ case DX_WAVE_READ_LANE_FIRST: -+ return VKD3DSIH_WAVE_READ_LANE_FIRST; - default: - vkd3d_unreachable(); - } -@@ -3855,6 +4623,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co - return VKD3DSIH_UMAX; - case DX_UMIN: - return VKD3DSIH_UMIN; -+ case DX_WAVE_READ_LANE_AT: -+ return VKD3DSIH_WAVE_READ_LANE_AT; - default: - vkd3d_unreachable(); - } -@@ -3974,6 +4744,98 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr - dst->u.reg = dst_params[0].reg; - } - -+static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ enum dxil_sync_flags flags; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SYNC); -+ flags = sm6_value_get_constant_uint(operands[0]); -+ ins->flags = flags & (SYNC_THREAD_GROUP | SYNC_THREAD_GROUP_UAV); -+ if (flags & SYNC_GLOBAL_UAV) -+ ins->flags |= VKD3DSSF_GLOBAL_UAV; -+ if (flags & SYNC_GROUP_SHARED_MEMORY) -+ ins->flags |= VKD3DSSF_GROUP_SHARED_MEMORY; -+ if (flags &= ~(SYNC_THREAD_GROUP | SYNC_GLOBAL_UAV | SYNC_THREAD_GROUP_UAV | SYNC_GROUP_SHARED_MEMORY)) -+ { -+ FIXME("Unhandled flags %#x.\n", flags); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Barrier flags %#x are unhandled.", flags); -+ } -+} -+ -+static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_params; -+ const struct sm6_value *resource; -+ unsigned int i; -+ int8_t inc; -+ -+ resource = operands[0]; -+ if (!sm6_value_validate_is_handle(resource, sm6)) -+ return; -+ -+ if (!sm6_value_is_constant(operands[1])) -+ { -+ FIXME("Unsupported dynamic update operand.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "A dynamic update value for a UAV counter operation is not supported."); -+ return; -+ } -+ i = sm6_value_get_constant_uint(operands[1]); -+ if (i != 1 && i != 255) -+ { -+ WARN("Unexpected update value %#x.\n", i); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Update value %#x for a UAV counter operation is not supported.", i); -+ } -+ inc = i; -+ -+ vsir_instruction_init(ins, &sm6->p.location, (inc < 0) ? VKD3DSIH_IMM_ATOMIC_CONSUME : VKD3DSIH_IMM_ATOMIC_ALLOC); -+ if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_vector_from_reg(&src_params[0], &resource->u.handle.reg); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static void sm6_parser_emit_dx_calculate_lod(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ const struct sm6_value *resource, *sampler; -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_register coord; -+ unsigned int clamp; -+ -+ resource = operands[0]; -+ sampler = operands[1]; -+ if (!sm6_value_validate_is_texture_handle(resource, op, sm6) -+ || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) -+ { -+ return; -+ } -+ -+ if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], 3, NULL, state, &coord)) -+ return; -+ -+ clamp = sm6_value_get_constant_uint(operands[5]); -+ -+ ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LOD); -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; -+ src_param_init_vector_from_reg(&src_params[0], &coord); -+ src_params[1].reg = resource->u.handle.reg; -+ src_param_init_scalar(&src_params[1], !clamp); -+ src_param_init_vector_from_reg(&src_params[2], &sampler->u.handle.reg); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4004,6 +4866,44 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr - instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6); - } - -+static void sm6_parser_dcl_register_builtin(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, -+ enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int component_count) -+{ -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!bitmap_is_set(sm6->io_regs_declared, reg_type)) -+ { -+ bitmap_set(sm6->io_regs_declared, reg_type); -+ ins = sm6_parser_add_instruction(sm6, handler_idx); -+ dst_param = &ins->declaration.dst; -+ vsir_register_init(&dst_param->reg, reg_type, data_type, 0); -+ dst_param_init_vector(dst_param, component_count); -+ } -+} -+ -+static void sm6_parser_emit_dx_input_register_mov(struct sm6_parser *sm6, -+ struct vkd3d_shader_instruction *ins, enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type) -+{ -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, reg_type, data_type, 1); -+ vsir_register_init(&src_param->reg, reg_type, data_type, 0); -+ src_param_init(src_param); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static void sm6_parser_emit_dx_coverage(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_COVERAGE, VKD3D_DATA_UINT); -+} -+ - static const struct sm6_descriptor_info *sm6_parser_get_descriptor(struct sm6_parser *sm6, - enum vkd3d_shader_descriptor_type type, unsigned int id, const struct sm6_value *address) - { -@@ -4065,31 +4965,233 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int - ins->handler_idx = VKD3DSIH_NOP; - } - --static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { - struct vkd3d_shader_instruction *ins = state->ins; -- struct vkd3d_shader_src_param *src_params; -- unsigned int is_texture, component_count; -- enum dxil_resource_kind resource_kind; -- struct vkd3d_shader_dst_param *dst; -- const struct sm6_value *resource; -- -- resource = operands[0]; -- if (!sm6_value_validate_is_handle(resource, sm6)) -- return; -- is_texture = resource->u.handle.d->resource_type != VKD3D_SHADER_RESOURCE_BUFFER; -- resource_kind = resource->u.handle.d->kind; -+ struct vkd3d_shader_src_param *src_param; -+ unsigned int i; - -- instruction_init_with_resource(ins, is_texture ? VKD3DSIH_RESINFO : VKD3DSIH_BUFINFO, resource, sm6); -+ vsir_instruction_init(ins, &sm6->p.location, (op == DX_CUT_STREAM) ? VKD3DSIH_CUT_STREAM : VKD3DSIH_EMIT_STREAM); - -- if (!(src_params = instruction_src_params_alloc(ins, 1 + is_texture, sm6))) -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) - return; -- src_param_init_vector_from_reg(&src_params[is_texture], &resource->u.handle.reg); - -- if (is_texture) -+ i = sm6_value_get_constant_uint(operands[0]); -+ if (i >= MAX_GS_OUTPUT_STREAMS) - { -- ins->flags = VKD3DSI_RESINFO_UINT; -+ WARN("Invalid stream index %u.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Output stream index %u is invalid.", i); -+ } -+ -+ /* VKD3D_DATA_UNUSED would be more reasonable, but TPF uses data type 0 here. */ -+ register_init_with_id(&src_param->reg, VKD3DSPR_STREAM, 0, i); -+ src_param_init(src_param); -+ -+ if (op == DX_EMIT_THEN_CUT_STREAM) -+ { -+ ++state->ins; -+ ++state->code_block->instruction_count; -+ sm6_parser_emit_dx_stream(sm6, DX_CUT_STREAM, operands, state); -+ } -+} -+ -+static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_DISCARD); -+ -+ if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ src_param_init_from_value(src_param, operands[0]); -+} -+ -+static void sm6_parser_emit_dx_domain_location(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ unsigned int component_idx; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ -+ if ((component_idx = sm6_value_get_constant_uint(operands[0])) >= 3) -+ { -+ WARN("Invalid component index %u.\n", component_idx); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid domain location component index %u.", component_idx); -+ component_idx = 0; -+ } -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 3); -+ vsir_register_init(&src_param->reg, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 0); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param_init_scalar(src_param, component_idx); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_register regs[2]; -+ enum vkd3d_shader_opcode handler_idx; -+ unsigned int component_count; -+ -+ switch (op) -+ { -+ case DX_DOT2: -+ handler_idx = VKD3DSIH_DP2; -+ component_count = 2; -+ break; -+ case DX_DOT3: -+ handler_idx = VKD3DSIH_DP3; -+ component_count = 3; -+ break; -+ case DX_DOT4: -+ handler_idx = VKD3DSIH_DP4; -+ component_count = 4; -+ break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (!sm6_parser_emit_composite_construct(sm6, &operands[0], component_count, state, ®s[0])) -+ return; -+ if (!sm6_parser_emit_composite_construct(sm6, &operands[component_count], component_count, state, ®s[1])) -+ return; -+ -+ ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, handler_idx); -+ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) -+ return; -+ src_param_init_vector_from_reg(&src_params[0], ®s[0]); -+ src_param_init_vector_from_reg(&src_params[1], ®s[1]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ src_param->modifiers = VKD3DSPSM_ABS; -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static void sm6_parser_emit_dx_compute_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ unsigned int component_count = 3, component_idx = 0; -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ enum vkd3d_shader_register_type reg_type; -+ -+ switch (op) -+ { -+ case DX_THREAD_ID: -+ reg_type = VKD3DSPR_THREADID; -+ break; -+ case DX_GROUP_ID: -+ reg_type = VKD3DSPR_THREADGROUPID; -+ break; -+ case DX_THREAD_ID_IN_GROUP: -+ reg_type = VKD3DSPR_LOCALTHREADID; -+ break; -+ case DX_FLATTENED_THREAD_ID_IN_GROUP: -+ reg_type = VKD3DSPR_LOCALTHREADINDEX; -+ component_count = 1; -+ break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, reg_type, VKD3D_DATA_UINT, component_count); -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ vsir_register_init(&src_param->reg, reg_type, VKD3D_DATA_UINT, 0); -+ if (component_count > 1) -+ { -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ component_idx = sm6_value_get_constant_uint(operands[0]); -+ } -+ src_param_init_scalar(src_param, component_idx); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static enum vkd3d_shader_opcode sm6_dx_map_ma_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) -+{ -+ switch (op) -+ { -+ case DX_FMA: -+ return VKD3DSIH_DFMA; -+ case DX_FMAD: -+ return VKD3DSIH_MAD; -+ case DX_IMAD: -+ case DX_UMAD: -+ return VKD3DSIH_IMAD; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void sm6_parser_emit_dx_ma(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_params; -+ unsigned int i; -+ -+ vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_ma_op(op, operands[0]->type)); -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; -+ for (i = 0; i < 3; ++i) -+ src_param_init_from_value(&src_params[i], operands[i]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_params; -+ unsigned int is_texture, component_count; -+ enum dxil_resource_kind resource_kind; -+ struct vkd3d_shader_dst_param *dst; -+ const struct sm6_value *resource; -+ -+ resource = operands[0]; -+ if (!sm6_value_validate_is_handle(resource, sm6)) -+ return; -+ is_texture = resource->u.handle.d->resource_type != VKD3D_SHADER_RESOURCE_BUFFER; -+ resource_kind = resource->u.handle.d->kind; -+ -+ instruction_init_with_resource(ins, is_texture ? VKD3DSIH_RESINFO : VKD3DSIH_BUFINFO, resource, sm6); -+ -+ if (!(src_params = instruction_src_params_alloc(ins, 1 + is_texture, sm6))) -+ return; -+ src_param_init_vector_from_reg(&src_params[is_texture], &resource->u.handle.reg); -+ -+ if (is_texture) -+ { -+ ins->flags = VKD3DSI_RESINFO_UINT; - src_param_init_from_value(&src_params[0], operands[1]); - component_count = VKD3D_VEC4_SIZE; - -@@ -4171,18 +5273,44 @@ static void sm6_parser_emit_dx_tertiary(struct sm6_parser *sm6, enum dx_intrinsi - static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -+ bool is_control_point = op == DX_LOAD_OUTPUT_CONTROL_POINT; -+ bool is_patch_constant = op == DX_LOAD_PATCH_CONSTANT; - struct vkd3d_shader_instruction *ins = state->ins; -+ struct vsir_program *program = sm6->p.program; -+ unsigned int count, row_index, column_index; -+ const struct vkd3d_shader_dst_param *params; - struct vkd3d_shader_src_param *src_param; - const struct shader_signature *signature; -- unsigned int row_index, column_index; - const struct signature_element *e; - - row_index = sm6_value_get_constant_uint(operands[0]); - column_index = sm6_value_get_constant_uint(operands[2]); - -+ if (is_control_point && operands[3]->is_undefined) -+ { -+ /* dxcompiler will compile source which does this, so let it pass. */ -+ WARN("Control point id is undefined.\n"); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND, -+ "The index for a control point load is undefined."); -+ } -+ - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); - -- signature = &sm6->p.shader_desc.input_signature; -+ if (is_patch_constant) -+ { -+ signature = &program->patch_constant_signature; -+ params = sm6->patch_constant_params; -+ } -+ else if (is_control_point) -+ { -+ signature = &program->output_signature; -+ params = sm6->output_params; -+ } -+ else -+ { -+ signature = &program->input_signature; -+ params = sm6->input_params; -+ } - if (row_index >= signature->element_count) - { - WARN("Invalid row index %u.\n", row_index); -@@ -4194,14 +5322,54 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin - - if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) - return; -- src_param->reg = sm6->input_params[row_index].reg; -+ src_param->reg = params[row_index].reg; - src_param_init_scalar(src_param, column_index); -+ count = 0; -+ - if (e->register_count > 1) -- register_index_address_init(&src_param->reg.idx[0], operands[1], sm6); -+ register_index_address_init(&src_param->reg.idx[count++], operands[1], sm6); -+ -+ if (!is_patch_constant && !operands[3]->is_undefined) -+ { -+ assert(src_param->reg.idx_count > count); -+ register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); -+ } -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_register reg; -+ -+ if (!sm6_parser_emit_composite_construct(sm6, &operands[0], 2, state, ®)) -+ return; -+ -+ ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_params[0].reg = reg; -+ src_param_init_vector(&src_params[0], 2); - - instruction_dst_param_init_ssa_scalar(ins, sm6); - } - -+static void sm6_parser_emit_dx_output_control_point_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT); -+} -+ -+static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); -+} -+ - static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4424,6 +5592,59 @@ static void sm6_parser_emit_dx_buffer_store(struct sm6_parser *sm6, enum dx_intr - dst_param->reg = resource->u.handle.reg; - } - -+static void sm6_parser_emit_dx_get_sample_count(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_INFO); -+ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ vsir_register_init(&src_param->reg, VKD3DSPR_RASTERIZER, VKD3D_DATA_FLOAT, 0); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param_init(src_param); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+ ins->dst->reg.data_type = VKD3D_DATA_FLOAT; -+} -+ -+static void sm6_parser_emit_dx_get_sample_pos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_params; -+ const struct sm6_value *resource = NULL; -+ -+ if (op == DX_TEX2DMS_GET_SAMPLE_POS) -+ { -+ resource = operands[0]; -+ if (!sm6_value_validate_is_texture_2dms_handle(resource, op, sm6)) -+ return; -+ } -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_POS); -+ -+ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) -+ return; -+ if (op == DX_TEX2DMS_GET_SAMPLE_POS) -+ { -+ src_param_init_vector_from_reg(&src_params[0], &resource->u.handle.reg); -+ src_param_init_from_value(&src_params[1], operands[1]); -+ } -+ else -+ { -+ src_param_init_vector(&src_params[0], 2); -+ vsir_register_init(&src_params[0].reg, VKD3DSPR_RASTERIZER, VKD3D_DATA_FLOAT, 0); -+ src_params[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param_init_from_value(&src_params[1], operands[0]); -+ } -+ -+ instruction_dst_param_init_ssa_vector(ins, 2, sm6); -+} -+ - static unsigned int sm6_value_get_texel_offset(const struct sm6_value *value) - { - return sm6_value_is_undef(value) ? 0 : sm6_value_get_constant_uint(value); -@@ -4521,6 +5742,21 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ - instruction_dst_param_init_ssa_vector(ins, component_count, sm6); - } - -+static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ if (instruction_dst_param_init_ssa_scalar(ins, sm6)) -+ ins->dst->modifiers = VKD3DSPDM_SATURATE; -+} -+ - static void sm6_parser_emit_dx_sincos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4561,7 +5797,9 @@ static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intr - static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -+ bool is_patch_constant = op == DX_STORE_PATCH_CONSTANT; - struct vkd3d_shader_instruction *ins = state->ins; -+ struct vsir_program *program = sm6->p.program; - struct vkd3d_shader_src_param *src_param; - struct vkd3d_shader_dst_param *dst_param; - const struct shader_signature *signature; -@@ -4572,7 +5810,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr - row_index = sm6_value_get_constant_uint(operands[0]); - column_index = sm6_value_get_constant_uint(operands[2]); - -- signature = &sm6->p.shader_desc.output_signature; -+ signature = is_patch_constant ? &program->patch_constant_signature : &program->output_signature; - if (row_index >= signature->element_count) - { - WARN("Invalid row index %u.\n", row_index); -@@ -4604,14 +5842,82 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr - if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) - return; - dst_param_init_scalar(dst_param, column_index); -- dst_param->reg = sm6->output_params[row_index].reg; -+ dst_param->reg = is_patch_constant ? sm6->patch_constant_params[row_index].reg : sm6->output_params[row_index].reg; - if (e->register_count > 1) - register_index_address_init(&dst_param->reg.idx[0], operands[1], sm6); - -+ if (e->register_index == UINT_MAX) -+ { -+ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_OUTPUT, dst_param->reg.type, -+ dst_param->reg.data_type, vsir_write_mask_component_count(e->mask)); -+ } -+ - if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) - src_param_init_from_value(src_param, value); - } - -+static void sm6_parser_emit_dx_texture_gather(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_register coord, offset; -+ const struct sm6_value *resource, *sampler; -+ struct vkd3d_shader_src_param *src_params; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int swizzle; -+ bool extended_offset; -+ -+ resource = operands[0]; -+ sampler = operands[1]; -+ if (!sm6_value_validate_is_texture_handle(resource, op, sm6) -+ || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) -+ { -+ return; -+ } -+ -+ if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], VKD3D_VEC4_SIZE, NULL, state, &coord)) -+ return; -+ -+ if ((extended_offset = !sm6_value_vector_is_constant_or_undef(&operands[6], 2)) -+ && !sm6_parser_emit_coordinate_construct(sm6, &operands[6], 2, NULL, state, &offset)) -+ { -+ return; -+ } -+ -+ ins = state->ins; -+ if (op == DX_TEXTURE_GATHER) -+ { -+ instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO : VKD3DSIH_GATHER4, resource, sm6); -+ if (!(src_params = instruction_src_params_alloc(ins, 3 + extended_offset, sm6))) -+ return; -+ } -+ else -+ { -+ instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO_C : VKD3DSIH_GATHER4_C, resource, sm6); -+ if (!(src_params = instruction_src_params_alloc(ins, 4 + extended_offset, sm6))) -+ return; -+ src_param_init_from_value(&src_params[3 + extended_offset], operands[9]); -+ } -+ -+ src_param_init_vector_from_reg(&src_params[0], &coord); -+ if (extended_offset) -+ src_param_init_vector_from_reg(&src_params[1], &offset); -+ else -+ instruction_set_texel_offset(ins, &operands[6], sm6); -+ src_param_init_vector_from_reg(&src_params[1 + extended_offset], &resource->u.handle.reg); -+ src_param_init_vector_from_reg(&src_params[2 + extended_offset], &sampler->u.handle.reg); -+ /* Swizzle stored in the sampler parameter is the scalar component index to be gathered. */ -+ swizzle = sm6_value_get_constant_uint(operands[8]); -+ if (swizzle >= VKD3D_VEC4_SIZE) -+ { -+ WARN("Invalid swizzle %#x.\n", swizzle); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Swizzle %#x for a texture gather operation is invalid.", swizzle); -+ } -+ src_params[2 + extended_offset].swizzle = swizzle; -+ -+ instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); -+} -+ - static void sm6_parser_emit_dx_texture_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4707,6 +6013,131 @@ static void sm6_parser_emit_dx_texture_store(struct sm6_parser *sm6, enum dx_int - dst_param_init_with_mask(dst_param, write_mask); - } - -+static void sm6_parser_emit_dx_wave_active_ballot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_WAVE_ACTIVE_BALLOT); -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); -+} -+ -+static enum vkd3d_shader_opcode sm6_dx_map_wave_bit_op(enum dxil_wave_bit_op_kind op, -+ struct sm6_parser *sm6) -+{ -+ switch (op) -+ { -+ case WAVE_BIT_OP_AND: -+ return VKD3DSIH_WAVE_ACTIVE_BIT_AND; -+ case WAVE_BIT_OP_OR: -+ return VKD3DSIH_WAVE_ACTIVE_BIT_OR; -+ case WAVE_BIT_OP_XOR: -+ return VKD3DSIH_WAVE_ACTIVE_BIT_XOR; -+ default: -+ FIXME("Unhandled wave bit op %u.\n", op); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, -+ "Wave bit operation %u is unhandled.\n", op); -+ return VKD3DSIH_INVALID; -+ } -+} -+ -+static void sm6_parser_emit_dx_wave_active_bit(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ enum dxil_wave_bit_op_kind wave_op; -+ enum vkd3d_shader_opcode opcode; -+ -+ wave_op = sm6_value_get_constant_uint(operands[1]); -+ -+ if ((opcode = sm6_dx_map_wave_bit_op(wave_op, sm6)) == VKD3DSIH_INVALID) -+ return; -+ vsir_instruction_init(ins, &sm6->p.location, opcode); -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static enum vkd3d_shader_opcode sm6_dx_map_wave_op(enum dxil_wave_op_kind op, bool is_signed, bool is_float, -+ struct sm6_parser *sm6) -+{ -+ switch (op) -+ { -+ case WAVE_OP_ADD: -+ return VKD3DSIH_WAVE_OP_ADD; -+ case WAVE_OP_MUL: -+ return VKD3DSIH_WAVE_OP_MUL; -+ case WAVE_OP_MIN: -+ if (is_float) -+ return VKD3DSIH_WAVE_OP_MIN; -+ return is_signed ? VKD3DSIH_WAVE_OP_IMIN : VKD3DSIH_WAVE_OP_UMIN; -+ case WAVE_OP_MAX: -+ if (is_float) -+ return VKD3DSIH_WAVE_OP_MAX; -+ return is_signed ? VKD3DSIH_WAVE_OP_IMAX : VKD3DSIH_WAVE_OP_UMAX; -+ default: -+ FIXME("Unhandled wave op %u.\n", op); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, -+ "Wave operation %u is unhandled.\n", op); -+ return VKD3DSIH_INVALID; -+ } -+} -+ -+static void sm6_parser_emit_dx_wave_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ enum vkd3d_shader_opcode opcode; -+ enum dxil_wave_op_kind wave_op; -+ bool is_signed; -+ -+ wave_op = sm6_value_get_constant_uint(operands[1]); -+ is_signed = !sm6_value_get_constant_uint(operands[2]); -+ opcode = sm6_dx_map_wave_op(wave_op, is_signed, sm6_type_is_floating_point(operands[0]->type), sm6); -+ -+ if (opcode == VKD3DSIH_INVALID) -+ return; -+ -+ vsir_instruction_init(ins, &sm6->p.location, opcode); -+ ins->flags = (op == DX_WAVE_PREFIX_OP) ? VKD3DSI_WAVE_PREFIX : 0; -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static void sm6_parser_emit_dx_wave_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ enum vkd3d_shader_register_type type; -+ -+ switch (op) -+ { -+ case DX_WAVE_GET_LANE_COUNT: -+ type = VKD3DSPR_WAVELANECOUNT; -+ break; -+ case DX_WAVE_GET_LANE_INDEX: -+ type = VKD3DSPR_WAVELANEINDEX; -+ break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, type, VKD3D_DATA_UINT); -+} -+ - struct sm6_dx_opcode_info - { - const char *ret_type; -@@ -4723,6 +6154,7 @@ struct sm6_dx_opcode_info - C -> constant or undefined int8/16/32 - i -> int32 - m -> int16/32/64 -+ n -> any numeric - f -> float - d -> double - e -> half/float -@@ -4730,6 +6162,7 @@ struct sm6_dx_opcode_info - H -> handle - D -> Dimensions - S -> splitdouble -+ V -> 4 x i32 - v -> void - o -> overloaded - R -> matches the return type -@@ -4741,29 +6174,47 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop}, - [DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop}, -+ [DX_BARRIER ] = {"v", "c", sm6_parser_emit_dx_barrier}, - [DX_BFREV ] = {"m", "R", sm6_parser_emit_dx_unary}, - [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, - [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store}, -+ [DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter}, -+ [DX_CALCULATE_LOD ] = {"f", "HHfffb", sm6_parser_emit_dx_calculate_lod}, - [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, - [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, - [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, -+ [DX_COVERAGE ] = {"i", "", sm6_parser_emit_dx_coverage}, - [DX_CREATE_HANDLE ] = {"H", "ccib", sm6_parser_emit_dx_create_handle}, -+ [DX_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, - [DX_DERIV_COARSEX ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_COARSEY ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, -+ [DX_DISCARD ] = {"v", "1", sm6_parser_emit_dx_discard}, -+ [DX_DOMAIN_LOCATION ] = {"f", "c", sm6_parser_emit_dx_domain_location}, -+ [DX_DOT2 ] = {"g", "RRRR", sm6_parser_emit_dx_dot}, -+ [DX_DOT3 ] = {"g", "RRRRRR", sm6_parser_emit_dx_dot}, -+ [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, -+ [DX_EMIT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, -+ [DX_EMIT_THEN_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, - [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, - [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, - [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, - [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, -+ [DX_FLATTENED_THREAD_ID_IN_GROUP ] = {"i", "", sm6_parser_emit_dx_compute_builtin}, -+ [DX_FMA ] = {"g", "RRR", sm6_parser_emit_dx_ma}, -+ [DX_FMAD ] = {"g", "RRR", sm6_parser_emit_dx_ma}, - [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, - [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, - [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_GET_DIMENSIONS ] = {"D", "Hi", sm6_parser_emit_dx_get_dimensions}, -+ [DX_GROUP_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, - [DX_IBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, - [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_HTAN ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_IMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, - [DX_IMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, - [DX_IMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, - [DX_ISFINITE ] = {"1", "g", sm6_parser_emit_dx_unary}, -@@ -4772,7 +6223,12 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_LEGACY_F16TOF32 ] = {"f", "i", sm6_parser_emit_dx_unary}, - [DX_LEGACY_F32TOF16 ] = {"i", "f", sm6_parser_emit_dx_unary}, - [DX_LOAD_INPUT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, -+ [DX_LOAD_OUTPUT_CONTROL_POINT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, -+ [DX_LOAD_PATCH_CONSTANT ] = {"o", "ii8", sm6_parser_emit_dx_load_input}, - [DX_LOG ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, -+ [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, -+ [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, - [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, - [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, - [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, -@@ -4780,22 +6236,46 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_ROUND_PI ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_ROUND_Z ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_RSQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_RT_GET_SAMPLE_COUNT ] = {"i", "", sm6_parser_emit_dx_get_sample_count}, -+ [DX_RT_GET_SAMPLE_POS ] = {"o", "i", sm6_parser_emit_dx_get_sample_pos}, - [DX_SAMPLE ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_B ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_C ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_C_LZ ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_GRAD ] = {"o", "HHffffiiifffffff", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_LOD ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, -+ [DX_SATURATE ] = {"g", "R", sm6_parser_emit_dx_saturate}, - [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_sincos}, - [DX_SPLIT_DOUBLE ] = {"S", "d", sm6_parser_emit_dx_split_double}, - [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, -+ [DX_STORE_PATCH_CONSTANT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, - [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, -+ [DX_TEX2DMS_GET_SAMPLE_POS ] = {"o", "Hi", sm6_parser_emit_dx_get_sample_pos}, -+ [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, -+ [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, - [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, - [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, -+ [DX_THREAD_ID ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, -+ [DX_THREAD_ID_IN_GROUP ] = {"i", "c", sm6_parser_emit_dx_compute_builtin}, - [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, -+ [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, - [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, - [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, -+ [DX_WAVE_ACTIVE_ALL_EQUAL ] = {"1", "n", sm6_parser_emit_dx_unary}, -+ [DX_WAVE_ACTIVE_BALLOT ] = {"V", "1", sm6_parser_emit_dx_wave_active_ballot}, -+ [DX_WAVE_ACTIVE_BIT ] = {"m", "Rc", sm6_parser_emit_dx_wave_active_bit}, -+ [DX_WAVE_ACTIVE_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, -+ [DX_WAVE_ALL_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, -+ [DX_WAVE_ALL_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, -+ [DX_WAVE_ANY_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, -+ [DX_WAVE_GET_LANE_COUNT ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, -+ [DX_WAVE_GET_LANE_INDEX ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, -+ [DX_WAVE_IS_FIRST_LANE ] = {"1", "", sm6_parser_emit_dx_void}, -+ [DX_WAVE_PREFIX_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, -+ [DX_WAVE_PREFIX_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, -+ [DX_WAVE_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, -+ [DX_WAVE_READ_LANE_FIRST ] = {"n", "R", sm6_parser_emit_dx_unary}, - }; - - static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_value *value, char info_type, -@@ -4827,6 +6307,8 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc - return sm6_type_is_i32(type); - case 'm': - return sm6_type_is_i16_i32_i64(type); -+ case 'n': -+ return sm6_type_is_numeric(type); - case 'f': - return sm6_type_is_float(type); - case 'd': -@@ -4841,6 +6323,8 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc - return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Dimensions"); - case 'S': - return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.splitdouble"); -+ case 'V': -+ return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.fouri32"); - case 'v': - return !type; - case 'o': -@@ -5055,7 +6539,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ - break; - case CAST_ZEXT: - case CAST_SEXT: -- /* nop or min precision. TODO: native 16-bit */ -+ /* nop or min precision. TODO: native 16-bit. -+ * Extension instructions could be emitted for min precision, but in Windows -+ * the AMD RX 580 simply drops such instructions, which makes sense as no -+ * assumptions should be made about any behaviour which depends on bit width. */ - if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) - { - op = VKD3DSIH_NOP; -@@ -5187,8 +6674,8 @@ static const struct sm6_cmp_info *sm6_map_cmp2_op(uint64_t code) - [FCMP_OLT] = {VKD3DSIH_LTO}, - [FCMP_OLE] = {VKD3DSIH_GEO, true}, - [FCMP_ONE] = {VKD3DSIH_NEO}, -- [FCMP_ORD] = {VKD3DSIH_INVALID}, -- [FCMP_UNO] = {VKD3DSIH_INVALID}, -+ [FCMP_ORD] = {VKD3DSIH_ORD}, -+ [FCMP_UNO] = {VKD3DSIH_UNO}, - [FCMP_UEQ] = {VKD3DSIH_EQU}, - [FCMP_UGT] = {VKD3DSIH_LTU, true}, - [FCMP_UGE] = {VKD3DSIH_GEU}, -@@ -5248,6 +6735,15 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor - - code = record->operands[i++]; - -+ /* dxcompiler occasionally emits bool not-equal-to-false, which is a no-op. Bool comparisons -+ * do not otherwise occur, so deleting these avoids the need for backend support. */ -+ if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) -+ { -+ ins->handler_idx = VKD3DSIH_NOP; -+ *dst = *a; -+ return; -+ } -+ - if ((!is_int && !is_fp) || is_int != (code >= ICMP_EQ)) - { - FIXME("Invalid operation %"PRIu64" on type class %u.\n", code, type_a->class); -@@ -5274,34 +6770,116 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor - - vsir_instruction_init(ins, &sm6->p.location, cmp->handler_idx); - -- flags = (record->operand_count > i) ? record->operands[i] : 0; -- silence_warning = false; -+ flags = (record->operand_count > i) ? record->operands[i] : 0; -+ silence_warning = false; -+ -+ if (is_fp) -+ { -+ if (!(flags & FP_ALLOW_UNSAFE_ALGEBRA)) -+ ins->flags |= VKD3DSI_PRECISE_X; -+ flags &= ~FP_ALLOW_UNSAFE_ALGEBRA; -+ /* SPIR-V FPFastMathMode is only available in the Kernel execution model. */ -+ silence_warning = !(flags & ~(FP_NO_NAN | FP_NO_INF | FP_NO_SIGNED_ZEROS | FP_ALLOW_RECIPROCAL)); -+ } -+ if (flags && silence_warning) -+ { -+ TRACE("Ignoring fast FP modifier %#"PRIx64".\n", flags); -+ } -+ else if (flags) -+ { -+ WARN("Ignoring flags %#"PRIx64".\n", flags); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring flags %#"PRIx64" for a comparison operation.", flags); -+ } -+ -+ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) -+ return; -+ src_param_init_from_value(&src_params[0 ^ cmp->src_swap], a); -+ src_param_init_from_value(&src_params[1 ^ cmp->src_swap], b); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct vkd3d_shader_instruction *ins, struct sm6_value *dst) -+{ -+ uint64_t success_ordering, failure_ordering; -+ struct vkd3d_shader_dst_param *dst_params; -+ struct vkd3d_shader_src_param *src_params; -+ const struct sm6_value *ptr, *cmp, *new; -+ const struct sm6_type *type; -+ unsigned int i = 0; -+ bool is_volatile; -+ uint64_t code; -+ -+ if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -+ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) -+ || !sm6_value_validate_is_backward_ref(ptr, sm6)) -+ return; - -- if (is_fp) -+ if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) - { -- if (!(flags & FP_ALLOW_UNSAFE_ALGEBRA)) -- ins->flags |= VKD3DSI_PRECISE_X; -- flags &= ~FP_ALLOW_UNSAFE_ALGEBRA; -- /* SPIR-V FPFastMathMode is only available in the Kernel execution model. */ -- silence_warning = !(flags & ~(FP_NO_NAN | FP_NO_INF | FP_NO_SIGNED_ZEROS | FP_ALLOW_RECIPROCAL)); -+ WARN("Register is not groupshared.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "The destination register for a cmpxchg instruction is not groupshared memory."); -+ return; - } -- if (flags && silence_warning) -+ -+ if (!(dst->type = sm6_type_get_cmpxchg_result_struct(sm6))) - { -- TRACE("Ignoring fast FP modifier %#"PRIx64".\n", flags); -+ WARN("Failed to find result struct.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "Module does not define a result struct type for a cmpxchg instruction."); -+ return; - } -- else if (flags) -+ -+ type = ptr->type->u.pointer.type; -+ cmp = sm6_parser_get_value_by_ref(sm6, record, type, &i); -+ new = sm6_parser_get_value_by_ref(sm6, record, type, &i); -+ if (!cmp || !new) -+ return; -+ -+ if (!sm6_value_validate_is_i32(cmp, sm6) -+ || !sm6_value_validate_is_i32(new, sm6) -+ || !dxil_record_validate_operand_count(record, i + 3, i + 5, sm6)) - { -- WARN("Ignoring flags %#"PRIx64".\n", flags); -- vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -- "Ignoring flags %#"PRIx64" for a comparison operation.", flags); -+ return; - } - -- if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) -+ is_volatile = record->operands[i++]; -+ success_ordering = record->operands[i++]; -+ -+ if ((code = record->operands[i++]) != 1) -+ FIXME("Ignoring synchronisation scope %"PRIu64".\n", code); -+ -+ failure_ordering = (record->operand_count > i) ? record->operands[i++] : success_ordering; -+ -+ /* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */ -+ if (success_ordering != ORDERING_SEQCST) -+ FIXME("Unhandled success ordering %"PRIu64".\n", success_ordering); -+ if (success_ordering != failure_ordering) -+ FIXME("Unhandled failure ordering %"PRIu64".\n", failure_ordering); -+ -+ if (record->operand_count > i && record->operands[i]) -+ FIXME("Ignoring weak cmpxchg.\n"); -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_IMM_ATOMIC_CMP_EXCH); -+ ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; -+ -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) - return; -- src_param_init_from_value(&src_params[0 ^ cmp->src_swap], a); -- src_param_init_from_value(&src_params[1 ^ cmp->src_swap], b); -+ src_param_make_constant_uint(&src_params[0], 0); -+ src_param_init_from_value(&src_params[1], cmp); -+ src_param_init_from_value(&src_params[2], new); - -- instruction_dst_param_init_ssa_scalar(ins, sm6); -+ if (!(dst_params = instruction_dst_params_alloc(ins, 2, sm6))) -+ return; -+ register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); -+ dst_param_init(&dst_params[0]); -+ dst_params[1].reg = ptr->u.reg; -+ dst_param_init(&dst_params[1]); -+ -+ dst->u.reg = dst_params[0].reg; - } - - static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -5459,6 +7037,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record - register_index_address_init(®->idx[1], elem_value, sm6); - reg->idx[1].is_in_bounds = is_in_bounds; - reg->idx_count = 2; -+ dst->structure_stride = src->structure_stride; - - ins->handler_idx = VKD3DSIH_NOP; - } -@@ -5467,8 +7046,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor - struct vkd3d_shader_instruction *ins, struct sm6_value *dst) - { - const struct sm6_type *elem_type = NULL, *pointee_type; -- struct vkd3d_shader_src_param *src_param; -- unsigned int alignment, i = 0; -+ unsigned int alignment, operand_count, i = 0; -+ struct vkd3d_shader_src_param *src_params; - const struct sm6_value *ptr; - uint64_t alignment_code; - -@@ -5476,6 +7055,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor - return; - if (!sm6_value_validate_is_register(ptr, sm6) - || !sm6_value_validate_is_pointer(ptr, sm6) -+ || !sm6_value_validate_is_backward_ref(ptr, sm6) - || !dxil_record_validate_operand_count(record, i + 2, i + 3, sm6)) - return; - -@@ -5505,12 +7085,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor - if (record->operands[i]) - WARN("Ignoring volatile modifier.\n"); - -- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (ptr->structure_stride) -+ { -+ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); - -- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -- return; -- src_param_init_from_value(&src_param[0], ptr); -- src_param->reg.alignment = alignment; -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; -+ if (ptr->u.reg.idx[1].rel_addr) -+ src_params[0] = *ptr->u.reg.idx[1].rel_addr; -+ else -+ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); -+ /* Struct offset is always zero as there is no struct, just an array. */ -+ src_param_make_constant_uint(&src_params[1], 0); -+ src_param_init_from_value(&src_params[2], ptr); -+ src_params[2].reg.alignment = alignment; -+ } -+ else -+ { -+ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV); -+ -+ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) -+ return; -+ if (operand_count > 1) -+ src_param_make_constant_uint(&src_params[0], 0); -+ src_param_init_from_value(&src_params[operand_count - 1], ptr); -+ src_params[operand_count - 1].reg.alignment = alignment; -+ } - - instruction_dst_param_init_ssa_scalar(ins, sm6); - } -@@ -5628,16 +7230,17 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record - static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, - struct vkd3d_shader_instruction *ins, struct sm6_value *dst) - { -- struct vkd3d_shader_src_param *src_param; -+ unsigned int i = 0, alignment, operand_count; -+ struct vkd3d_shader_src_param *src_params; - struct vkd3d_shader_dst_param *dst_param; - const struct sm6_type *pointee_type; - const struct sm6_value *ptr, *src; -- unsigned int i = 0, alignment; - uint64_t alignment_code; - - if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) - || !sm6_value_validate_is_register(ptr, sm6) -- || !sm6_value_validate_is_pointer(ptr, sm6)) -+ || !sm6_value_validate_is_pointer(ptr, sm6) -+ || !sm6_value_validate_is_backward_ref(ptr, sm6)) - { - return; - } -@@ -5665,16 +7268,40 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco - if (record->operands[i]) - WARN("Ignoring volatile modifier.\n"); - -- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ if (ptr->structure_stride) -+ { -+ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); - -- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -- return; -- src_param_init_from_value(&src_param[0], src); -+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; -+ if (ptr->u.reg.idx[1].rel_addr) -+ src_params[0] = *ptr->u.reg.idx[1].rel_addr; -+ else -+ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); -+ /* Struct offset is always zero as there is no struct, just an array. */ -+ src_param_make_constant_uint(&src_params[1], 0); -+ src_param_init_from_value(&src_params[2], src); -+ } -+ else -+ { -+ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); -+ -+ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) -+ return; -+ if (operand_count > 1) -+ src_param_make_constant_uint(&src_params[0], 0); -+ src_param_init_from_value(&src_params[operand_count - 1], src); -+ } - - dst_param = instruction_dst_params_alloc(ins, 1, sm6); - dst_param_init(dst_param); - dst_param->reg = ptr->u.reg; - dst_param->reg.alignment = alignment; -+ /* Groupshared stores contain the address in the src params. */ -+ if (dst_param->reg.type != VKD3DSPR_IDXTEMP) -+ dst_param->reg.idx_count = 1; - } - - static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -5855,6 +7482,25 @@ static bool sm6_metadata_get_uint64_value(const struct sm6_parser *sm6, - return true; - } - -+static bool sm6_metadata_get_float_value(const struct sm6_parser *sm6, -+ const struct sm6_metadata_value *m, float *f) -+{ -+ const struct sm6_value *value; -+ -+ if (!m || m->type != VKD3D_METADATA_VALUE) -+ return false; -+ -+ value = m->u.value; -+ if (!sm6_value_is_constant(value)) -+ return false; -+ if (!sm6_type_is_floating_point(value->type)) -+ return false; -+ -+ *f = register_get_float_value(&value->u.reg); -+ -+ return true; -+} -+ - static void sm6_parser_metadata_attachment_block_init(struct sm6_parser *sm6, const struct dxil_block *target_block, - const struct dxil_block *block) - { -@@ -6124,6 +7770,7 @@ static enum vkd3d_result sm6_function_resolve_phi_incomings(const struct sm6_fun - static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, - struct sm6_function *function) - { -+ struct vsir_program *program = sm6->p.program; - struct vkd3d_shader_instruction *ins; - size_t i, block_idx, block_count; - const struct dxil_record *record; -@@ -6132,11 +7779,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - struct sm6_block *code_block; - struct sm6_value *dst; - -- if (sm6->function_count) -- { -- FIXME("Multiple functions are not supported yet.\n"); -- return VKD3D_ERROR_INVALID_SHADER; -- } - if (!(function->declaration = sm6_parser_next_function_definition(sm6))) - { - WARN("Failed to find definition to match function body.\n"); -@@ -6207,6 +7849,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - fwd_type = dst->type; - dst->type = NULL; - dst->value_type = VALUE_TYPE_REG; -+ dst->is_back_ref = true; - is_terminator = false; - - record = block->records[i]; -@@ -6215,6 +7858,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - case FUNC_CODE_INST_ALLOCA: - sm6_parser_emit_alloca(sm6, record, ins, dst); - break; -+ case FUNC_CODE_INST_ATOMICRMW: -+ { -+ struct function_emission_state state = {code_block, ins}; -+ sm6_parser_emit_atomicrmw(sm6, record, &state, dst); -+ program->temp_count = max(program->temp_count, state.temp_idx); -+ break; -+ } - case FUNC_CODE_INST_BINOP: - sm6_parser_emit_binop(sm6, record, ins, dst); - break; -@@ -6226,7 +7876,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - { - struct function_emission_state state = {code_block, ins}; - sm6_parser_emit_call(sm6, record, &state, dst); -- sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); -+ program->temp_count = max(program->temp_count, state.temp_idx); - break; - } - case FUNC_CODE_INST_CAST: -@@ -6235,6 +7885,9 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - case FUNC_CODE_INST_CMP2: - sm6_parser_emit_cmp2(sm6, record, ins, dst); - break; -+ case FUNC_CODE_INST_CMPXCHG: -+ sm6_parser_emit_cmpxchg(sm6, record, ins, dst); -+ break; - case FUNC_CODE_INST_EXTRACTVAL: - sm6_parser_emit_extractval(sm6, record, ins, dst); - break; -@@ -6497,9 +8150,10 @@ static void sm6_parser_emit_label(struct sm6_parser *sm6, unsigned int label_id) - - static enum vkd3d_result sm6_function_emit_blocks(const struct sm6_function *function, struct sm6_parser *sm6) - { -+ struct vsir_program *program = sm6->p.program; - unsigned int i; - -- sm6->p.program.block_count = function->block_count; -+ program->block_count = function->block_count; - - for (i = 0; i < function->block_count; ++i) - { -@@ -6515,9 +8169,9 @@ static enum vkd3d_result sm6_function_emit_blocks(const struct sm6_function *fun - sm6_parser_emit_label(sm6, block->id); - sm6_block_emit_phi(block, sm6); - -- memcpy(&sm6->p.program.instructions.elements[sm6->p.program.instructions.count], block->instructions, -+ memcpy(&program->instructions.elements[program->instructions.count], block->instructions, - block->instruction_count * sizeof(*block->instructions)); -- sm6->p.program.instructions.count += block->instruction_count; -+ program->instructions.count += block->instruction_count; - - sm6_block_emit_terminator(block, sm6); - } -@@ -6800,14 +8454,53 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = - { - [SEMANTIC_KIND_ARBITRARY] = VKD3D_SHADER_SV_NONE, - [SEMANTIC_KIND_VERTEXID] = VKD3D_SHADER_SV_VERTEX_ID, -+ [SEMANTIC_KIND_INSTANCEID] = VKD3D_SHADER_SV_INSTANCE_ID, - [SEMANTIC_KIND_POSITION] = VKD3D_SHADER_SV_POSITION, -+ [SEMANTIC_KIND_CLIPDISTANCE] = VKD3D_SHADER_SV_CLIP_DISTANCE, -+ [SEMANTIC_KIND_CULLDISTANCE] = VKD3D_SHADER_SV_CULL_DISTANCE, -+ [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, - [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, -+ [SEMANTIC_KIND_COVERAGE] = VKD3D_SHADER_SV_COVERAGE, - [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, -+ [SEMANTIC_KIND_DEPTH] = VKD3D_SHADER_SV_DEPTH, -+ [SEMANTIC_KIND_DEPTHLESSEQUAL] = VKD3D_SHADER_SV_DEPTH_LESS_EQUAL, -+ [SEMANTIC_KIND_DEPTHGREATEREQUAL] = VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL, - }; - --static enum vkd3d_shader_sysval_semantic sysval_semantic_from_dxil_semantic_kind(enum dxil_semantic_kind kind) -+static enum vkd3d_shader_sysval_semantic sysval_semantic_from_dxil_semantic_kind(enum dxil_semantic_kind kind, -+ enum vkd3d_tessellator_domain domain) - { -- if (kind < ARRAY_SIZE(sysval_semantic_table)) -+ if (kind == SEMANTIC_KIND_TESSFACTOR) -+ { -+ switch (domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ return VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ return VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; -+ default: -+ /* Error is handled during parsing. */ -+ return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; -+ } -+ } -+ else if (kind == SEMANTIC_KIND_INSIDETESSFACTOR) -+ { -+ switch (domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ return VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ return VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ return VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; -+ default: -+ /* Error is handled during parsing. */ -+ return VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; -+ } -+ } -+ else if (kind < ARRAY_SIZE(sysval_semantic_table)) - { - return sysval_semantic_table[kind]; - } -@@ -7448,7 +9141,7 @@ static enum vkd3d_result sm6_parser_descriptor_type_init(struct sm6_parser *sm6, - } - - ++sm6->descriptor_count; -- ++sm6->p.program.instructions.count; -+ ++sm6->p.program->instructions.count; - } - - return VKD3D_OK; -@@ -7563,12 +9256,13 @@ static void signature_element_read_additional_element_values(struct signature_el - } - - static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, -- struct shader_signature *s) -+ struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) - { - unsigned int i, j, column_count, operand_count, index; - const struct sm6_metadata_node *node, *element_node; - struct signature_element *elements, *e; - unsigned int values[10]; -+ bool is_register; - - if (!m) - return VKD3D_OK; -@@ -7656,7 +9350,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - e->min_precision = minimum_precision_from_dxil_component_type(values[2]); - - j = values[3]; -- e->sysval_semantic = sysval_semantic_from_dxil_semantic_kind(j); -+ e->sysval_semantic = sysval_semantic_from_dxil_semantic_kind(j, tessellator_domain); - if (j != SEMANTIC_KIND_ARBITRARY && j != SEMANTIC_KIND_TARGET && e->sysval_semantic == VKD3D_SHADER_SV_NONE) - { - WARN("Unhandled semantic kind %u.\n", j); -@@ -7677,7 +9371,18 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - column_count = values[7]; - e->register_index = values[8]; - e->target_location = e->register_index; -- if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) -+ -+ if ((is_register = e->register_index == UINT_MAX)) -+ { -+ if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID) -+ { -+ WARN("Unhandled I/O register semantic kind %u.\n", j); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -+ "DXIL semantic kind %u is unhandled for an I/O register.", j); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ } -+ else if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) - { - WARN("Invalid row start %u with row count %u.\n", e->register_index, e->register_count); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -@@ -7685,8 +9390,9 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - e->register_index, e->register_count); - return VKD3D_ERROR_INVALID_SHADER; - } -+ - index = values[9]; -- if (index >= VKD3D_VEC4_SIZE || column_count > VKD3D_VEC4_SIZE - index) -+ if (index != UINT8_MAX && (index >= VKD3D_VEC4_SIZE || column_count > VKD3D_VEC4_SIZE - index)) - { - WARN("Invalid column start %u with count %u.\n", index, column_count); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -@@ -7696,10 +9402,17 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - - e->mask = vkd3d_write_mask_from_component_count(column_count); - e->used_mask = e->mask; -- e->mask <<= index; -- - signature_element_read_additional_element_values(e, element_node, sm6); -- e->used_mask <<= index; -+ -+ if (index != UINT8_MAX) -+ { -+ e->mask <<= index; -+ e->used_mask <<= index; -+ } -+ -+ /* DXIL reads/writes uint for bool I/O. */ -+ if (e->component_type == VKD3D_SHADER_COMPONENT_BOOL) -+ e->component_type = VKD3D_SHADER_COMPONENT_UINT; - - m = element_node->operands[4]; - if (!sm6_metadata_value_is_node(m)) -@@ -7732,128 +9445,476 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - } - } - -- vkd3d_free(s->elements); -- s->elements = elements; -- s->element_count = operand_count; -+ vkd3d_free(s->elements); -+ s->elements = elements; -+ s->element_count = operand_count; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, -+ enum vkd3d_tessellator_domain tessellator_domain) -+{ -+ struct vsir_program *program = sm6->p.program; -+ enum vkd3d_result ret; -+ -+ if (!sm6_metadata_value_is_node(m)) -+ { -+ WARN("Signature table is not a node.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -+ "Signature table is not a metadata node."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], -+ &program->input_signature, tessellator_domain)) < 0) -+ { -+ return ret; -+ } -+ if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], -+ &program->output_signature, tessellator_domain)) < 0) -+ { -+ return ret; -+ } -+ if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], -+ &program->patch_constant_signature, tessellator_domain)) < 0) -+ { -+ return ret; -+ } -+ -+ sm6_parser_init_input_signature(sm6, &program->input_signature); -+ sm6_parser_init_output_signature(sm6, &program->output_signature); -+ sm6_parser_init_patch_constant_signature(sm6, &program->patch_constant_signature); -+ -+ return VKD3D_OK; -+} -+ -+static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm6_metadata_value *m) -+{ -+ enum vkd3d_shader_global_flags global_flags, mask, rotated_flags; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags)) -+ { -+ WARN("Failed to load global flags.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Global flags metadata value is not an integer."); -+ return; -+ } -+ /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vkd3d_shader_global_flags. */ -+ mask = (VKD3DSGF_SKIP_OPTIMIZATION << 1) - 1; -+ rotated_flags = global_flags & mask; -+ rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4); -+ global_flags = (global_flags & ~mask) | rotated_flags; -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_GLOBAL_FLAGS); -+ ins->declaration.global_flags = global_flags; -+} -+ -+static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) -+{ -+ struct vkd3d_shader_version *version = &sm6->p.program->shader_version; -+ const struct sm6_metadata_node *node; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int group_sizes[3]; -+ unsigned int i; -+ -+ if (version->type != VKD3D_SHADER_TYPE_COMPUTE) -+ { -+ WARN("Shader of type %#x has thread group dimensions.\n", version->type); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Shader has thread group dimensions but is not a compute shader."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!m || !sm6_metadata_value_is_node(m)) -+ { -+ WARN("Thread group dimension value is not a node.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Thread group dimension metadata value is not a node."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ node = m->u.node; -+ if (node->operand_count != 3) -+ { -+ WARN("Invalid operand count %u.\n", node->operand_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Thread group dimension operand count %u is invalid.", node->operand_count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ for (i = 0; i < 3; ++i) -+ { -+ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &group_sizes[i])) -+ { -+ WARN("Thread group dimension is not an integer value.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Thread group dimension metadata value is not an integer."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!group_sizes[i] || group_sizes[i] > dx_max_thread_group_size[i]) -+ { -+ char dim = "XYZ"[i]; -+ WARN("Invalid thread group %c dimension %u.\n", dim, group_sizes[i]); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Thread group %c dimension %u is invalid.", dim, group_sizes[i]); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ } -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_THREAD_GROUP); -+ ins->declaration.thread_group_size.x = group_sizes[0]; -+ ins->declaration.thread_group_size.y = group_sizes[1]; -+ ins->declaration.thread_group_size.z = group_sizes[2]; -+ -+ return VKD3D_OK; -+} -+ -+static void sm6_parser_emit_dcl_count(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, unsigned int count) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ ins = sm6_parser_add_instruction(sm6, handler_idx); -+ ins->declaration.count = count; -+} -+ -+static void sm6_parser_emit_dcl_primitive_topology(struct sm6_parser *sm6, -+ enum vkd3d_shader_opcode handler_idx, enum vkd3d_primitive_type primitive_type, -+ unsigned int patch_vertex_count) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ ins = sm6_parser_add_instruction(sm6, handler_idx); -+ ins->declaration.primitive_type.type = primitive_type; -+ ins->declaration.primitive_type.patch_vertex_count = patch_vertex_count; -+} -+ -+static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, -+ enum vkd3d_tessellator_domain tessellator_domain) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ if (tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID || tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) -+ { -+ WARN("Unhandled domain %u.\n", tessellator_domain); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Domain shader tessellator domain %u is unhandled.", tessellator_domain); -+ } -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); -+ ins->declaration.tessellator_domain = tessellator_domain; -+} -+ -+static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, -+ const char *type) -+{ -+ if (!count || count > 32) -+ { -+ WARN("%s control point count %u invalid.\n", type, count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "%s control point count %u is invalid.", type, count); -+ } -+} -+ -+static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, -+ enum vkd3d_shader_tessellator_partitioning tessellator_partitioning) -+{ -+ struct vkd3d_shader_instruction *ins; - -- return VKD3D_OK; -+ if (!tessellator_partitioning || tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) -+ { -+ WARN("Unhandled partitioning %u.\n", tessellator_partitioning); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader tessellator partitioning %u is unhandled.", tessellator_partitioning); -+ } -+ -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); -+ ins->declaration.tessellator_partitioning = tessellator_partitioning; - } - --static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) -+static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, -+ enum vkd3d_shader_tessellator_output_primitive primitive) - { -- enum vkd3d_result ret; -+ struct vkd3d_shader_instruction *ins; - -- if (!sm6_metadata_value_is_node(m)) -+ if (!primitive || primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) - { -- WARN("Signature table is not a node.\n"); -- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -- "Signature table is not a metadata node."); -- return VKD3D_ERROR_INVALID_SHADER; -+ WARN("Unhandled output primitive %u.\n", primitive); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader tessellator output primitive %u is unhandled.", primitive); - } - -- if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], -- &sm6->p.shader_desc.input_signature)) < 0) -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); -+ ins->declaration.tessellator_output_primitive = primitive; -+} -+ -+static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) -+{ -+ struct vkd3d_shader_instruction *ins; -+ float max_tessellation_factor; -+ -+ if (!sm6_metadata_get_float_value(sm6, m, &max_tessellation_factor)) - { -- return ret; -+ WARN("Max tess factor property is not a float value.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader max tessellation factor property operand is not a float."); -+ return; - } -- if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], -- &sm6->p.shader_desc.output_signature)) < 0) -+ -+ /* Exclude non-finite values. */ -+ if (!(max_tessellation_factor >= 1.0f && max_tessellation_factor <= 64.0f)) - { -- return ret; -+ WARN("Invalid max tess factor %f.\n", max_tessellation_factor); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader max tessellation factor %f is invalid.", max_tessellation_factor); - } -- /* TODO: patch constant signature in operand 2. */ -- -- sm6_parser_init_input_signature(sm6, &sm6->p.shader_desc.input_signature); -- sm6_parser_init_output_signature(sm6, &sm6->p.shader_desc.output_signature); - -- return VKD3D_OK; -+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_HS_MAX_TESSFACTOR); -+ ins->declaration.max_tessellation_factor = max_tessellation_factor; - } - --static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm6_metadata_value *m) -+static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) - { -- enum vkd3d_shader_global_flags global_flags, mask, rotated_flags; -- struct vkd3d_shader_instruction *ins; -+ enum vkd3d_primitive_type input_primitive = VKD3D_PT_TRIANGLELIST, output_primitive; -+ unsigned int i, input_control_point_count = 1, patch_vertex_count = 0; -+ const struct sm6_metadata_node *node; -+ unsigned int operands[5] = {0}; - -- if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags)) -+ if (!m || !sm6_metadata_value_is_node(m)) - { -- WARN("Failed to load global flags.\n"); -+ WARN("Missing or invalid GS properties.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -- "Global flags metadata value is not an integer."); -+ "Geometry shader properties node is missing or invalid."); - return; - } -- /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vkd3d_shader_global_flags. */ -- mask = (VKD3DSGF_SKIP_OPTIMIZATION << 1) - 1; -- rotated_flags = global_flags & mask; -- rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4); -- global_flags = (global_flags & ~mask) | rotated_flags; - -- ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_GLOBAL_FLAGS); -- ins->declaration.global_flags = global_flags; -+ node = m->u.node; -+ if (node->operand_count < ARRAY_SIZE(operands)) -+ { -+ WARN("Invalid operand count %u.\n", node->operand_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Geometry shader properties operand count %u is invalid.", node->operand_count); -+ return; -+ } -+ if (node->operand_count > ARRAY_SIZE(operands)) -+ { -+ WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %zu extra operands for geometry shader properties.", -+ node->operand_count - ARRAY_SIZE(operands)); -+ } -+ -+ for (i = 0; i < node->operand_count; ++i) -+ { -+ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) -+ { -+ WARN("GS property at index %u is not a uint value.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader properties operand at index %u is not an integer.", i); -+ } -+ } -+ -+ switch (i = operands[0]) -+ { -+ case INPUT_PRIMITIVE_POINT: -+ input_primitive = VKD3D_PT_POINTLIST; -+ input_control_point_count = 1; -+ break; -+ -+ case INPUT_PRIMITIVE_LINE: -+ input_primitive = VKD3D_PT_LINELIST; -+ input_control_point_count = 2; -+ break; -+ -+ case INPUT_PRIMITIVE_TRIANGLE: -+ input_primitive = VKD3D_PT_TRIANGLELIST; -+ input_control_point_count = 3; -+ break; -+ -+ case INPUT_PRIMITIVE_LINEWITHADJACENCY: -+ input_primitive = VKD3D_PT_LINELIST_ADJ; -+ input_control_point_count = 4; -+ break; -+ -+ case INPUT_PRIMITIVE_TRIANGLEWITHADJACENY: -+ input_primitive = VKD3D_PT_TRIANGLELIST_ADJ; -+ input_control_point_count = 6; -+ break; -+ -+ default: -+ if (i >= INPUT_PRIMITIVE_PATCH1 && i <= INPUT_PRIMITIVE_PATCH32) -+ { -+ input_primitive = VKD3D_PT_PATCH; -+ patch_vertex_count = i - INPUT_PRIMITIVE_PATCH1 + 1; -+ break; -+ } -+ -+ WARN("Unhandled input primitive %u.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader input primitive %u is unhandled.", i); -+ break; -+ } -+ -+ sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_INPUT_PRIMITIVE, input_primitive, patch_vertex_count); -+ sm6->p.program->input_control_point_count = input_control_point_count; -+ -+ i = operands[1]; -+ /* Max total scalar count sets an upper limit. We would need to scan outputs to be more precise. */ -+ if (i > MAX_GS_OUTPUT_TOTAL_SCALARS) -+ { -+ WARN("GS output vertex count %u invalid.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader output vertex count %u is invalid.", i); -+ } -+ sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_VERTICES_OUT, i); -+ -+ if (operands[2] > 1) -+ { -+ FIXME("Unhandled stream mask %#x.\n", operands[2]); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader stream mask %#x is unhandled.", operands[2]); -+ } -+ -+ output_primitive = operands[3]; -+ if (output_primitive == VKD3D_PT_UNDEFINED || output_primitive >= VKD3D_PT_COUNT) -+ { -+ WARN("Unhandled output primitive %u.\n", output_primitive); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader output primitive %u is unhandled.", output_primitive); -+ output_primitive = VKD3D_PT_TRIANGLELIST; -+ } -+ sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, output_primitive, 0); -+ -+ i = operands[4]; -+ if (!i || i > MAX_GS_INSTANCE_COUNT) -+ { -+ WARN("GS instance count %u invalid.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader instance count %u is invalid.", i); -+ } -+ sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_GS_INSTANCES, i); - } - --static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) -+static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_parser *sm6, -+ const struct sm6_metadata_value *m) - { - const struct sm6_metadata_node *node; -- struct vkd3d_shader_instruction *ins; -- unsigned int group_sizes[3]; -+ unsigned int operands[2] = {0}; - unsigned int i; - -- if (sm6->p.program.shader_version.type != VKD3D_SHADER_TYPE_COMPUTE) -+ if (!m || !sm6_metadata_value_is_node(m)) - { -- WARN("Shader of type %#x has thread group dimensions.\n", sm6->p.program.shader_version.type); -+ WARN("Missing or invalid DS properties.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -- "Shader has thread group dimensions but is not a compute shader."); -- return VKD3D_ERROR_INVALID_SHADER; -+ "Domain shader properties node is missing or invalid."); -+ return 0; -+ } -+ -+ node = m->u.node; -+ if (node->operand_count < ARRAY_SIZE(operands)) -+ { -+ WARN("Invalid operand count %u.\n", node->operand_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Domain shader properties operand count %u is invalid.", node->operand_count); -+ return 0; -+ } -+ if (node->operand_count > ARRAY_SIZE(operands)) -+ { -+ WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %zu extra operands for domain shader properties.", -+ node->operand_count - ARRAY_SIZE(operands)); -+ } -+ -+ for (i = 0; i < node->operand_count; ++i) -+ { -+ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) -+ { -+ WARN("DS property at index %u is not a uint value.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Domain shader properties operand at index %u is not an integer.", i); -+ } - } - -+ sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); -+ sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); -+ sm6->p.program->input_control_point_count = operands[1]; -+ -+ return operands[0]; -+} -+ -+static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_parser *sm6, -+ const struct sm6_metadata_value *m) -+{ -+ struct vsir_program *program = sm6->p.program; -+ const struct sm6_metadata_node *node; -+ unsigned int operands[6] = {0}; -+ unsigned int i; -+ - if (!m || !sm6_metadata_value_is_node(m)) - { -- WARN("Thread group dimension value is not a node.\n"); -+ WARN("Missing or invalid HS properties.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -- "Thread group dimension metadata value is not a node."); -- return VKD3D_ERROR_INVALID_SHADER; -+ "Hull shader properties node is missing or invalid."); -+ return 0; - } - - node = m->u.node; -- if (node->operand_count != 3) -+ if (node->operand_count < 7) - { - WARN("Invalid operand count %u.\n", node->operand_count); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -- "Thread group dimension operand count %u is invalid.", node->operand_count); -- return VKD3D_ERROR_INVALID_SHADER; -+ "Hull shader properties operand count %u is invalid.", node->operand_count); -+ return 0; -+ } -+ if (node->operand_count > 7) -+ { -+ WARN("Ignoring %u extra operands.\n", node->operand_count - 7); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %u extra operands for hull shader properties.", node->operand_count - 7); - } - -- for (i = 0; i < 3; ++i) -+ m = node->operands[0]; -+ if (!sm6_metadata_value_is_value(m) || !sm6_value_is_function_dcl(m->u.value)) - { -- if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &group_sizes[i])) -- { -- WARN("Thread group dimension is not an integer value.\n"); -- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -- "Thread group dimension metadata value is not an integer."); -- return VKD3D_ERROR_INVALID_SHADER; -- } -- if (!group_sizes[i] || group_sizes[i] > dx_max_thread_group_size[i]) -+ WARN("Patch constant function node is not a function value.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Hull shader patch constant function node is not a function value."); -+ } -+ else -+ { -+ sm6->patch_constant_function = m->u.value->u.function.name; -+ } -+ -+ for (i = 1; i < min(node->operand_count, ARRAY_SIZE(operands)); ++i) -+ { -+ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) - { -- char dim = "XYZ"[i]; -- WARN("Invalid thread group %c dimension %u.\n", dim, group_sizes[i]); -+ WARN("HS property at index %u is not a uint value.\n", i); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -- "Thread group %c dimension %u is invalid.", dim, group_sizes[i]); -- return VKD3D_ERROR_INVALID_SHADER; -+ "Hull shader properties operand at index %u is not an integer.", i); - } - } - -- ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_THREAD_GROUP); -- ins->declaration.thread_group_size.x = group_sizes[0]; -- ins->declaration.thread_group_size.y = group_sizes[1]; -- ins->declaration.thread_group_size.z = group_sizes[2]; -+ sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); -+ program->input_control_point_count = operands[1]; -+ sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); -+ sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); -+ program->output_control_point_count = operands[2]; -+ sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); -+ sm6_parser_emit_dcl_tessellator_partitioning(sm6, operands[4]); -+ sm6_parser_emit_dcl_tessellator_output_primitive(sm6, operands[5]); -+ sm6_parser_emit_dcl_max_tessellation_factor(sm6, node->operands[6]); - -- return VKD3D_OK; -+ return operands[3]; - } - - static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) - { - const struct sm6_metadata_value *m = sm6_parser_find_named_metadata(sm6, "dx.entryPoints"); - const struct sm6_metadata_node *node, *entry_node = m ? m->u.node : NULL; -+ enum vkd3d_tessellator_domain tessellator_domain = 0; - unsigned int i, operand_count, tag; - const struct sm6_value *value; - enum vkd3d_result ret; -@@ -7892,12 +9953,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) - "Entry point function name %s does not match the name in metadata.", sm6->entry_point); - } - -- if (entry_node->operand_count >= 3 && (m = entry_node->operands[2]) -- && (ret = sm6_parser_signatures_init(sm6, m)) < 0) -- { -- return ret; -- } -- - if (entry_node->operand_count >= 5 && (m = entry_node->operands[4])) - { - if (!sm6_metadata_value_is_node(m)) -@@ -7932,6 +9987,15 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) - case SHADER_PROPERTIES_FLAGS: - sm6_parser_emit_global_flags(sm6, node->operands[i + 1]); - break; -+ case SHADER_PROPERTIES_GEOMETRY: -+ sm6_parser_gs_properties_init(sm6, node->operands[i + 1]); -+ break; -+ case SHADER_PROPERTIES_DOMAIN: -+ tessellator_domain = sm6_parser_ds_properties_init(sm6, node->operands[i + 1]); -+ break; -+ case SHADER_PROPERTIES_HULL: -+ tessellator_domain = sm6_parser_hs_properties_init(sm6, node->operands[i + 1]); -+ break; - case SHADER_PROPERTIES_COMPUTE: - if ((ret = sm6_parser_emit_thread_group(sm6, node->operands[i + 1])) < 0) - return ret; -@@ -7945,6 +10009,12 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) - } - } - -+ if (entry_node->operand_count >= 3 && (m = entry_node->operands[2]) -+ && (ret = sm6_parser_signatures_init(sm6, m, tessellator_domain)) < 0) -+ { -+ return ret; -+ } -+ - return VKD3D_OK; - } - -@@ -8049,28 +10119,18 @@ static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) - vkd3d_free(functions); - } - --static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) -+static void sm6_parser_cleanup(struct sm6_parser *sm6) - { -- struct sm6_parser *sm6 = sm6_parser(parser); -- - dxil_block_destroy(&sm6->root_block); - dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); -- vsir_program_cleanup(&parser->program); - sm6_type_table_cleanup(sm6->types, sm6->type_count); - sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); - sm6_functions_cleanup(sm6->functions, sm6->function_count); - sm6_parser_metadata_cleanup(sm6); - vkd3d_free(sm6->descriptors); - vkd3d_free(sm6->values); -- free_shader_desc(&parser->shader_desc); -- vkd3d_free(sm6); - } - --static const struct vkd3d_shader_parser_ops sm6_parser_ops = --{ -- .parser_destroy = sm6_parser_destroy, --}; -- - static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6, const char *name) - { - size_t i; -@@ -8080,15 +10140,15 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 - return NULL; - } - --static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, -- const char *source_name, struct vkd3d_shader_message_context *message_context) -+static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, const char *source_name, -+ struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) - { -- const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; -- const struct shader_signature *input_signature = &sm6->p.shader_desc.input_signature; -+ size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; -+ struct shader_signature *patch_constant_signature, *output_signature, *input_signature; - const struct vkd3d_shader_location location = {.source_name = source_name}; - uint32_t version_token, dxil_version, token_count, magic; -+ const uint32_t *byte_code = dxbc_desc->byte_code; - unsigned int chunk_offset, chunk_size; -- size_t count, length, function_count; - enum bitcode_block_abbreviation abbr; - struct vkd3d_shader_version version; - struct dxil_block *block; -@@ -8176,11 +10236,20 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - - /* Estimate instruction count to avoid reallocation in most shaders. */ - count = max(token_count, 400) - 400; -- vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, -- (count + (count >> 2)) / 2u + 10); -+ if (!vsir_program_init(program, &version, (count + (count >> 2)) / 2u + 10)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ vkd3d_shader_parser_init(&sm6->p, program, message_context, source_name); - sm6->ptr = &sm6->start[1]; - sm6->bitpos = 2; - -+ input_signature = &program->input_signature; -+ output_signature = &program->output_signature; -+ patch_constant_signature = &program->patch_constant_signature; -+ *input_signature = dxbc_desc->input_signature; -+ *output_signature = dxbc_desc->output_signature; -+ *patch_constant_signature = dxbc_desc->patch_constant_signature; -+ memset(dxbc_desc, 0, sizeof(*dxbc_desc)); -+ - block = &sm6->root_block; - if ((ret = dxil_block_init(block, NULL, sm6)) < 0) - { -@@ -8192,7 +10261,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - "DXIL bitcode chunk has invalid bitcode."); - else - vkd3d_unreachable(); -- return ret; -+ goto fail; - } - - dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); -@@ -8225,7 +10294,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - "DXIL type table is invalid."); - else - vkd3d_unreachable(); -- return ret; -+ goto fail; - } - - if ((ret = sm6_parser_symtab_init(sm6)) < 0) -@@ -8238,16 +10307,19 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - "DXIL value symbol table is invalid."); - else - vkd3d_unreachable(); -- return ret; -+ goto fail; - } - -- if (!(sm6->output_params = vsir_program_get_dst_params(&sm6->p.program, output_signature->element_count)) -- || !(sm6->input_params = vsir_program_get_dst_params(&sm6->p.program, input_signature->element_count))) -+ if (!(sm6->output_params = vsir_program_get_dst_params(program, output_signature->element_count)) -+ || !(sm6->input_params = vsir_program_get_dst_params(program, input_signature->element_count)) -+ || !(sm6->patch_constant_params = vsir_program_get_dst_params(program, -+ patch_constant_signature->element_count))) - { - ERR("Failed to allocate input/output parameters.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory allocating input/output parameters."); -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto fail; - } - - function_count = dxil_block_compute_function_count(&sm6->root_block); -@@ -8256,7 +10328,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - ERR("Failed to allocate function array.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory allocating DXIL function array."); -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto fail; - } - - if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) -@@ -8264,27 +10337,31 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - WARN("Value array count overflowed.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, - "Overflow occurred in the DXIL module value count."); -- return VKD3D_ERROR_INVALID_SHADER; -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; - } - if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) - { - ERR("Failed to allocate value array.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory allocating DXIL value array."); -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto fail; - } -+ sm6->function_count = 0; - sm6->ssa_next_id = 1; - - if ((ret = sm6_parser_globals_init(sm6)) < 0) - { - WARN("Failed to load global declarations.\n"); -- return ret; -+ goto fail; - } - - if (!sm6_parser_allocate_named_metadata(sm6)) - { - ERR("Failed to allocate named metadata array.\n"); -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto fail; - } - - for (i = 0, j = 0; i < sm6->root_block.child_block_count; ++i) -@@ -8298,18 +10375,19 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - FIXME("Too many metadata tables.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, - "A metadata table count greater than %zu is unsupported.", ARRAY_SIZE(sm6->metadata_tables)); -- return VKD3D_ERROR_INVALID_SHADER; -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; - } - - if ((ret = sm6_parser_metadata_init(sm6, block, &sm6->metadata_tables[j++])) < 0) -- return ret; -+ goto fail; - } - - if ((ret = sm6_parser_entry_point_init(sm6)) < 0) -- return ret; -+ goto fail; - - if ((ret = sm6_parser_resources_init(sm6)) < 0) -- return ret; -+ goto fail; - - if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) - { -@@ -8319,92 +10397,124 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - else if (ret == VKD3D_ERROR_INVALID_SHADER) - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, - "DXIL module is invalid."); -- return ret; -+ goto fail; - } - -- if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count)) -+ if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count -+ + patch_constant_signature->element_count)) - { - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory emitting shader signature declarations."); -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto fail; - } - -- sm6->p.program.ssa_count = sm6->ssa_next_id; -+ program->ssa_count = sm6->ssa_next_id; - - if (!(fn = sm6_parser_get_function(sm6, sm6->entry_point))) - { - WARN("Failed to find entry point %s.\n", sm6->entry_point); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ENTRY_POINT, - "The definition of the entry point function '%s' was not found.", sm6->entry_point); -- return VKD3D_ERROR_INVALID_SHADER; -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; - } - -- assert(sm6->function_count == 1); -- if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) -- return ret; -+ if (version.type == VKD3D_SHADER_TYPE_HULL) -+ { -+ sm6_parser_add_instruction(sm6, VKD3DSIH_HS_CONTROL_POINT_PHASE); -+ -+ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) -+ goto fail; -+ -+ if (!(fn = sm6_parser_get_function(sm6, sm6->patch_constant_function))) -+ { -+ WARN("Failed to find patch constant function '%s'.\n", sm6->patch_constant_function); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "Failed to find the patch constant function '%s' for a hull shader.", -+ sm6->patch_constant_function); -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ -+ sm6_parser_add_instruction(sm6, VKD3DSIH_HS_FORK_PHASE); -+ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) -+ goto fail; -+ -+ expected_function_count = 2; -+ } -+ else -+ { -+ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) -+ goto fail; -+ expected_function_count = 1; -+ } -+ -+ if (sm6->function_count > expected_function_count) -+ { -+ FIXME("%zu unhandled functions.\n", sm6->function_count - expected_function_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "%zu functions were not emitted.", sm6->function_count - expected_function_count); -+ } - - dxil_block_destroy(&sm6->root_block); - - return VKD3D_OK; -+ -+fail: -+ vsir_program_cleanup(program); -+ return ret; - } - --int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -+int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, -+ struct vkd3d_shader_message_context *message_context, struct vsir_program *program) - { -- struct vkd3d_shader_desc *shader_desc; -+ struct dxbc_shader_desc dxbc_desc = {0}; -+ struct sm6_parser sm6 = {0}; - uint32_t *byte_code = NULL; -- struct sm6_parser *sm6; - int ret; - - ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); - -- if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) -- { -- ERR("Failed to allocate parser.\n"); -- return VKD3D_ERROR_OUT_OF_MEMORY; -- } -- -- shader_desc = &sm6->p.shader_desc; -- shader_desc->is_dxil = true; -+ dxbc_desc.is_dxil = true; - if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, -- shader_desc)) < 0) -+ &dxbc_desc)) < 0) - { - WARN("Failed to extract shader, vkd3d result %d.\n", ret); -- vkd3d_free(sm6); - return ret; - } - -- sm6->p.shader_desc = *shader_desc; -- shader_desc = &sm6->p.shader_desc; -- -- if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) -+ if (((uintptr_t)dxbc_desc.byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) - { - /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC - * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ -- if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) -- ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); -- else -- memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); -+ if (!(byte_code = vkd3d_malloc(align(dxbc_desc.byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) -+ { -+ ERR("Failed to allocate aligned chunk.\n"); -+ free_dxbc_shader_desc(&dxbc_desc); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ memcpy(byte_code, dxbc_desc.byte_code, dxbc_desc.byte_code_size); -+ dxbc_desc.byte_code = byte_code; - } - -- ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, -- compile_info->source_name, message_context); -+ ret = sm6_parser_init(&sm6, program, compile_info->source_name, message_context, &dxbc_desc); -+ free_dxbc_shader_desc(&dxbc_desc); - vkd3d_free(byte_code); - -- if (!sm6->p.failed && ret >= 0) -- ret = vsir_validate(&sm6->p); -+ if (!sm6.p.failed && ret >= 0) -+ ret = vkd3d_shader_parser_validate(&sm6.p, config_flags); - -- if (sm6->p.failed && ret >= 0) -+ if (sm6.p.failed && ret >= 0) - ret = VKD3D_ERROR_INVALID_SHADER; - -+ sm6_parser_cleanup(&sm6); - if (ret < 0) - { -- WARN("Failed to initialise shader parser.\n"); -- sm6_parser_destroy(&sm6->p); -+ WARN("Failed to parse shader.\n"); - return ret; - } - -- *parser = &sm6->p; -- - return ret; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index bc70d5220fd..57b4ac24212 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -61,9 +61,9 @@ struct fx_write_context; - struct fx_write_context_ops - { - uint32_t (*write_string)(const char *string, struct fx_write_context *fx); -- uint32_t (*write_type)(const struct hlsl_type *type, struct fx_write_context *fx); - void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); - void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); -+ bool are_child_effects_supported; - }; - - struct fx_write_context -@@ -82,10 +82,23 @@ struct fx_write_context - uint32_t technique_count; - uint32_t group_count; - uint32_t buffer_count; -+ uint32_t shared_buffer_count; - uint32_t numeric_variable_count; -+ uint32_t shared_numeric_variable_count; - uint32_t object_variable_count; -+ uint32_t shared_object_count; -+ uint32_t shader_count; -+ uint32_t parameter_count; -+ uint32_t dsv_count; -+ uint32_t rtv_count; -+ uint32_t texture_count; -+ uint32_t uav_count; -+ uint32_t sampler_state_count; - int status; - -+ bool child_effect; -+ bool include_empty_buffers; -+ - const struct fx_write_context_ops *ops; - }; - -@@ -97,6 +110,11 @@ static void set_status(struct fx_write_context *fx, int status) - fx->status = status; - } - -+static bool has_annotations(const struct hlsl_ir_var *var) -+{ -+ return var->annotations && !list_empty(&var->annotations->vars); -+} -+ - static uint32_t write_string(const char *string, struct fx_write_context *fx) - { - return fx->ops->write_string(string, fx); -@@ -104,26 +122,37 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) - - static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - { -+ if (var->state_block_count) -+ hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); -+ - fx->ops->write_pass(var, fx); - } - -+static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); -+static const char * get_fx_4_type_name(const struct hlsl_type *type); -+ - static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) - { -+ const struct hlsl_type *element_type; - struct type_entry *type_entry; - unsigned int elements_count; - const char *name; - -+ assert(fx->ctx->profile->major_version >= 4); -+ - if (type->class == HLSL_CLASS_ARRAY) - { -- name = hlsl_get_multiarray_element_type(type)->name; - elements_count = hlsl_get_multiarray_size(type); -+ element_type = hlsl_get_multiarray_element_type(type); - } - else - { -- name = type->name; - elements_count = 0; -+ element_type = type; - } - -+ name = get_fx_4_type_name(element_type); -+ - LIST_FOR_EACH_ENTRY(type_entry, &fx->types, struct type_entry, entry) - { - if (strcmp(type_entry->name, name)) -@@ -138,7 +167,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context - if (!(type_entry = hlsl_alloc(fx->ctx, sizeof(*type_entry)))) - return 0; - -- type_entry->offset = fx->ops->write_type(type, fx); -+ type_entry->offset = write_fx_4_type(type, fx); - type_entry->name = name; - type_entry->elements_count = elements_count; - -@@ -151,6 +180,7 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co - struct fx_write_context *fx) - { - unsigned int version = ctx->profile->major_version; -+ struct hlsl_ir_var *var; - - memset(fx, 0, sizeof(*fx)); - -@@ -174,12 +204,25 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co - - rb_init(&fx->strings, string_storage_compare); - list_init(&fx->types); -+ -+ fx->child_effect = fx->ops->are_child_effects_supported && ctx->child_effect; -+ fx->include_empty_buffers = version == 4 && ctx->include_empty_buffers; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -+ { -+ list_add_tail(&ctx->extern_vars, &var->extern_entry); -+ var->is_uniform = 1; -+ } -+ } -+ -+ hlsl_calculate_buffer_offsets(fx->ctx); - } - - static int fx_write_context_cleanup(struct fx_write_context *fx) - { - struct type_entry *type, *next_type; -- int status = fx->status; - - rb_destroy(&fx->strings, string_storage_destroy, NULL); - -@@ -189,14 +232,14 @@ static int fx_write_context_cleanup(struct fx_write_context *fx) - vkd3d_free(type); - } - -- return status; -+ return fx->ctx->result; - } - - static bool technique_matches_version(const struct hlsl_ir_var *var, const struct fx_write_context *fx) - { - const struct hlsl_type *type = var->data_type; - -- if (type->base_type != HLSL_TYPE_TECHNIQUE) -+ if (type->class != HLSL_CLASS_TECHNIQUE) - return false; - - return type->e.version >= fx->min_technique_version && type->e.version <= fx->max_technique_version; -@@ -266,6 +309,14 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) - return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; - } - -+static const uint32_t fx_4_numeric_base_type[] = -+{ -+ [HLSL_TYPE_FLOAT] = 1, -+ [HLSL_TYPE_INT ] = 2, -+ [HLSL_TYPE_UINT ] = 3, -+ [HLSL_TYPE_BOOL ] = 4, -+}; -+ - static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx) - { - static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3; -@@ -278,13 +329,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - [HLSL_CLASS_VECTOR] = 2, - [HLSL_CLASS_MATRIX] = 3, - }; -- static const uint32_t numeric_base_type[] = -- { -- [HLSL_TYPE_FLOAT] = 1, -- [HLSL_TYPE_INT ] = 2, -- [HLSL_TYPE_UINT ] = 3, -- [HLSL_TYPE_BOOL ] = 4, -- }; -+ struct hlsl_ctx *ctx = fx->ctx; - uint32_t value = 0; - - switch (type->class) -@@ -295,22 +340,20 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - value |= numeric_type_class[type->class]; - break; - default: -- FIXME("Unexpected type class %u.\n", type->class); -- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ hlsl_fixme(ctx, &ctx->location, "Not implemented for type class %u.", type->class); - return 0; - } - -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: -- value |= (numeric_base_type[type->base_type] << NUMERIC_BASE_TYPE_SHIFT); -+ value |= (fx_4_numeric_base_type[type->e.numeric.type] << NUMERIC_BASE_TYPE_SHIFT); - break; - default: -- FIXME("Unexpected base type %u.\n", type->base_type); -- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->e.numeric.type); - return 0; - } - -@@ -322,20 +365,8 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - return value; - } - --static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) -+static const char * get_fx_4_type_name(const struct hlsl_type *type) - { -- struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -- uint32_t name_offset, offset, size, stride, numeric_desc; -- uint32_t elements_count = 0; -- const char *name; -- static const uint32_t variable_type[] = -- { -- [HLSL_CLASS_SCALAR] = 1, -- [HLSL_CLASS_VECTOR] = 1, -- [HLSL_CLASS_MATRIX] = 1, -- [HLSL_CLASS_OBJECT] = 2, -- [HLSL_CLASS_STRUCT] = 3, -- }; - static const char * const texture_type_names[] = - { - [HLSL_SAMPLER_DIM_GENERIC] = "texture", -@@ -360,6 +391,41 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", - }; - -+ switch (type->class) -+ { -+ case HLSL_CLASS_SAMPLER: -+ return "SamplerState"; -+ -+ case HLSL_CLASS_TEXTURE: -+ return texture_type_names[type->sampler_dim]; -+ -+ case HLSL_CLASS_UAV: -+ return uav_type_names[type->sampler_dim]; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ return "DepthStencilView"; -+ -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ return "RenderTargetView"; -+ -+ case HLSL_CLASS_VERTEX_SHADER: -+ return "VertexShader"; -+ -+ case HLSL_CLASS_PIXEL_SHADER: -+ return "PixelShader"; -+ -+ default: -+ return type->name; -+ } -+} -+ -+static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ uint32_t name_offset, offset, size, stride, numeric_desc; -+ uint32_t elements_count = 0; -+ const char *name; -+ - /* Resolve arrays to element type and number of elements. */ - if (type->class == HLSL_CLASS_ARRAY) - { -@@ -367,12 +433,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - type = hlsl_get_multiarray_element_type(type); - } - -- if (type->base_type == HLSL_TYPE_TEXTURE) -- name = texture_type_names[type->sampler_dim]; -- else if (type->base_type == HLSL_TYPE_UAV) -- name = uav_type_names[type->sampler_dim]; -- else -- name = type->name; -+ name = get_fx_4_type_name(type); - - name_offset = write_string(name, fx); - offset = put_u32_unaligned(buffer, name_offset); -@@ -382,11 +443,31 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: -- case HLSL_CLASS_OBJECT: -+ put_u32_unaligned(buffer, 1); -+ break; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: -+ put_u32_unaligned(buffer, 2); -+ break; -+ - case HLSL_CLASS_STRUCT: -- put_u32_unaligned(buffer, variable_type[type->class]); -+ put_u32_unaligned(buffer, 3); - break; -- default: -+ -+ case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: -+ vkd3d_unreachable(); -+ -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_VOID: - FIXME("Writing type class %u is not implemented.\n", type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); - return 0; -@@ -422,13 +503,8 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - put_u32_unaligned(buffer, field_type_offset); - } - } -- else if (type->class == HLSL_CLASS_OBJECT) -+ else if (type->class == HLSL_CLASS_TEXTURE) - { -- static const uint32_t object_type[] = -- { -- [HLSL_TYPE_RENDERTARGETVIEW] = 19, -- [HLSL_TYPE_DEPTHSTENCILVIEW] = 20, -- }; - static const uint32_t texture_type[] = - { - [HLSL_SAMPLER_DIM_GENERIC] = 9, -@@ -442,6 +518,15 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - [HLSL_SAMPLER_DIM_CUBE] = 17, - [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, - }; -+ -+ put_u32_unaligned(buffer, texture_type[type->sampler_dim]); -+ } -+ else if (type->class == HLSL_CLASS_SAMPLER) -+ { -+ put_u32_unaligned(buffer, 21); -+ } -+ else if (type->class == HLSL_CLASS_UAV) -+ { - static const uint32_t uav_type[] = - { - [HLSL_SAMPLER_DIM_1D] = 31, -@@ -453,29 +538,35 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, - }; - -- switch (type->base_type) -- { -- case HLSL_TYPE_DEPTHSTENCILVIEW: -- case HLSL_TYPE_RENDERTARGETVIEW: -- put_u32_unaligned(buffer, object_type[type->base_type]); -- break; -- case HLSL_TYPE_TEXTURE: -- put_u32_unaligned(buffer, texture_type[type->sampler_dim]); -- break; -- case HLSL_TYPE_UAV: -- put_u32_unaligned(buffer, uav_type[type->sampler_dim]); -- break; -- default: -- FIXME("Object type %u is not supported.\n", type->base_type); -- set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -- return 0; -- } -+ put_u32_unaligned(buffer, uav_type[type->sampler_dim]); -+ } -+ else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) -+ { -+ put_u32_unaligned(buffer, 20); -+ } -+ else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW) -+ { -+ put_u32_unaligned(buffer, 19); -+ } -+ else if (type->class == HLSL_CLASS_PIXEL_SHADER) -+ { -+ put_u32_unaligned(buffer, 5); -+ } -+ else if (type->class == HLSL_CLASS_VERTEX_SHADER) -+ { -+ put_u32_unaligned(buffer, 6); - } -- else /* Numeric type */ -+ else if (hlsl_is_numeric_type(type)) - { - numeric_desc = get_fx_4_numeric_type_description(type, fx); - put_u32_unaligned(buffer, numeric_desc); - } -+ else -+ { -+ FIXME("Type %u is not supported.\n", type->class); -+ set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ return 0; -+ } - - return offset; - } -@@ -556,7 +647,7 @@ static void write_groups(struct fx_write_context *fx) - { - const struct hlsl_type *type = var->data_type; - -- if (type->base_type == HLSL_TYPE_EFFECT_GROUP) -+ if (type->class == HLSL_CLASS_EFFECT_GROUP) - write_group(var, fx); - } - } -@@ -565,11 +656,71 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f - { - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - const char *s = string ? string : ""; -+ static const char tail[3]; - uint32_t size, offset; - - size = strlen(s) + 1; - offset = put_u32(buffer, size); - bytecode_put_bytes(buffer, s, size); -+ size %= 4; -+ if (size) -+ bytecode_put_bytes_unaligned(buffer, tail, 4 - size); -+ return offset; -+} -+ -+static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, -+ struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ uint32_t semantic_offset, offset, elements_count = 0, name_offset; -+ size_t i; -+ -+ /* Resolve arrays to element type and number of elements. */ -+ if (type->class == HLSL_CLASS_ARRAY) -+ { -+ elements_count = hlsl_get_multiarray_size(type); -+ type = hlsl_get_multiarray_element_type(type); -+ } -+ -+ name_offset = write_string(name, fx); -+ semantic_offset = write_string(semantic->name, fx); -+ -+ offset = put_u32(buffer, hlsl_sm1_base_type(type)); -+ put_u32(buffer, hlsl_sm1_class(type)); -+ put_u32(buffer, name_offset); -+ put_u32(buffer, semantic_offset); -+ put_u32(buffer, elements_count); -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_VECTOR: -+ put_u32(buffer, type->dimx); -+ put_u32(buffer, type->dimy); -+ break; -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_MATRIX: -+ put_u32(buffer, type->dimy); -+ put_u32(buffer, type->dimx); -+ break; -+ case HLSL_CLASS_STRUCT: -+ put_u32(buffer, type->e.record.field_count); -+ break; -+ default: -+ ; -+ } -+ -+ if (type->class == HLSL_CLASS_STRUCT) -+ { -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ const struct hlsl_struct_field *field = &type->e.record.fields[i]; -+ -+ /* Validated in check_invalid_object_fields(). */ -+ assert(hlsl_is_numeric_type(field->type)); -+ write_fx_2_parameter(field->type, field->name, &field->semantic, fx); -+ } -+ } -+ - return offset; - } - -@@ -595,6 +746,161 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex - set_u32(buffer, count_offset, count); - } - -+static uint32_t get_fx_2_type_size(const struct hlsl_type *type) -+{ -+ uint32_t size = 0, elements_count; -+ size_t i; -+ -+ if (type->class == HLSL_CLASS_ARRAY) -+ { -+ elements_count = hlsl_get_multiarray_size(type); -+ type = hlsl_get_multiarray_element_type(type); -+ return get_fx_2_type_size(type) * elements_count; -+ } -+ else if (type->class == HLSL_CLASS_STRUCT) -+ { -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ const struct hlsl_struct_field *field = &type->e.record.fields[i]; -+ size += get_fx_2_type_size(field->type); -+ } -+ -+ return size; -+ } -+ -+ return type->dimx * type->dimy * sizeof(float); -+} -+ -+static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ const struct hlsl_type *type = var->data_type; -+ uint32_t offset, size, elements_count = 1; -+ -+ size = get_fx_2_type_size(type); -+ -+ if (type->class == HLSL_CLASS_ARRAY) -+ { -+ elements_count = hlsl_get_multiarray_size(type); -+ type = hlsl_get_multiarray_element_type(type); -+ } -+ -+ /* Note that struct fields must all be numeric; -+ * this was validated in check_invalid_object_fields(). */ -+ switch (type->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ case HLSL_CLASS_STRUCT: -+ /* FIXME: write actual initial value */ -+ offset = put_u32(buffer, 0); -+ -+ for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) -+ put_u32(buffer, 0); -+ break; -+ -+ default: -+ /* Objects are given sequential ids. */ -+ offset = put_u32(buffer, fx->object_variable_count++); -+ for (uint32_t i = 1; i < elements_count; ++i) -+ put_u32(buffer, fx->object_variable_count++); -+ break; -+ } -+ -+ return offset; -+} -+ -+static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type *type, -+ const struct vkd3d_shader_location *loc) -+{ -+ switch (type->class) -+ { -+ case HLSL_CLASS_STRUCT: -+ /* Note that the fields must all be numeric; this was validated in -+ * check_invalid_object_fields(). */ -+ return true; -+ -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ return true; -+ -+ case HLSL_CLASS_ARRAY: -+ return is_type_supported_fx_2(ctx, type->e.array.type, loc); -+ -+ case HLSL_CLASS_TEXTURE: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ case HLSL_SAMPLER_DIM_2D: -+ case HLSL_SAMPLER_DIM_3D: -+ case HLSL_SAMPLER_DIM_CUBE: -+ case HLSL_SAMPLER_DIM_GENERIC: -+ return true; -+ default: -+ return false; -+ } -+ break; -+ -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_VERTEX_SHADER: -+ hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); -+ return false; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_VOID: -+ return false; -+ -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: -+ /* This cannot appear as an extern variable. */ -+ break; -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static void write_fx_2_parameters(struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t desc_offset, value_offset, flags; -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_ir_var *var; -+ enum fx_2_parameter_flags -+ { -+ IS_SHARED = 0x1, -+ }; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) -+ continue; -+ -+ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); -+ value_offset = write_fx_2_initial_value(var, fx); -+ -+ flags = 0; -+ if (var->storage_modifiers & HLSL_STORAGE_SHARED) -+ flags |= IS_SHARED; -+ -+ put_u32(buffer, desc_offset); /* Parameter description */ -+ put_u32(buffer, value_offset); /* Value */ -+ put_u32(buffer, flags); /* Flags */ -+ -+ put_u32(buffer, 0); /* Annotations count */ -+ if (has_annotations(var)) -+ hlsl_fixme(ctx, &ctx->location, "Writing annotations for parameters is not implemented."); -+ -+ ++fx->parameter_count; -+ } -+} -+ - static const struct fx_write_context_ops fx_2_ops = - { - .write_string = write_fx_2_string, -@@ -604,12 +910,13 @@ static const struct fx_write_context_ops fx_2_ops = - - static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - { -+ uint32_t offset, size, technique_count, parameter_count, object_count; - struct vkd3d_bytecode_buffer buffer = { 0 }; - struct vkd3d_bytecode_buffer *structured; -- uint32_t offset, size, technique_count; - struct fx_write_context fx; - - fx_write_context_init(ctx, &fx_2_ops, &fx); -+ fx.object_variable_count = 1; - structured = &fx.structured; - - /* First entry is always zeroed and skipped. */ -@@ -618,12 +925,14 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, 0xfeff0901); /* Version. */ - offset = put_u32(&buffer, 0); - -- put_u32(structured, 0); /* Parameter count */ -+ parameter_count = put_u32(structured, 0); /* Parameter count */ - technique_count = put_u32(structured, 0); - put_u32(structured, 0); /* Unknown */ -- put_u32(structured, 0); /* Object count */ -+ object_count = put_u32(structured, 0); - -- /* TODO: parameters */ -+ write_fx_2_parameters(&fx); -+ set_u32(structured, parameter_count, fx.parameter_count); -+ set_u32(structured, object_count, fx.object_variable_count); - - write_techniques(ctx->globals, &fx); - set_u32(structured, technique_count, fx.technique_count); -@@ -643,35 +952,39 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - vkd3d_free(fx.unstructured.data); - vkd3d_free(fx.structured.data); - -- if (!fx.status) -+ if (!fx.technique_count) -+ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found."); -+ -+ if (fx.status < 0) -+ ctx->result = fx.status; -+ -+ if (!ctx->result) - { - out->code = buffer.data; - out->size = buffer.size; - } - -- if (fx.status < 0) -- ctx->result = fx.status; -- - return fx_write_context_cleanup(&fx); - } - - static const struct fx_write_context_ops fx_4_ops = - { - .write_string = write_fx_4_string, -- .write_type = write_fx_4_type, - .write_technique = write_fx_4_technique, - .write_pass = write_fx_4_pass, -+ .are_child_effects_supported = true, - }; - --static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) -+static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t name_offset, type_offset, value_offset; - uint32_t semantic_offset, flags = 0; -- uint32_t name_offset, type_offset; - enum fx_4_numeric_variable_flags - { - HAS_EXPLICIT_BIND_POINT = 0x4, - }; -+ struct hlsl_ctx *ctx = fx->ctx; - - /* Explicit bind point. */ - if (var->reg_reservation.reg_type) -@@ -686,18 +999,345 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write - - semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ - put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ -- put_u32(buffer, 0); /* FIXME: default value offset */ -+ value_offset = put_u32(buffer, 0); /* Default value offset */ - put_u32(buffer, flags); /* Flags */ - -- put_u32(buffer, 0); /* Annotations count */ -- /* FIXME: write annotations */ -+ if (shared) -+ { -+ fx->shared_numeric_variable_count++; -+ } -+ else -+ { -+ /* FIXME: write default value */ -+ set_u32(buffer, value_offset, 0); -+ -+ put_u32(buffer, 0); /* Annotations count */ -+ if (has_annotations(var)) -+ hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); -+ -+ fx->numeric_variable_count++; -+ } -+} -+ -+struct rhs_named_value -+{ -+ const char *name; -+ unsigned int value; -+}; -+ -+static bool get_fx_4_state_enum_value(const struct rhs_named_value *pairs, -+ const char *name, unsigned int *value) -+{ -+ while (pairs->name) -+ { -+ if (!ascii_strcasecmp(pairs->name, name)) -+ { -+ *value = pairs->value; -+ return true; -+ } -+ -+ pairs++; -+ } -+ -+ return false; -+} -+ -+static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ struct hlsl_type *data_type = value->node.data_type; -+ struct hlsl_ctx *ctx = fx->ctx; -+ uint32_t i, type, offset; -+ unsigned int count = hlsl_type_component_count(data_type); -+ -+ offset = put_u32_unaligned(buffer, count); -+ -+ for (i = 0; i < count; ++i) -+ { -+ if (hlsl_is_numeric_type(data_type)) -+ { -+ switch (data_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ type = fx_4_numeric_base_type[data_type->e.numeric.type]; -+ break; -+ default: -+ type = 0; -+ hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); -+ } -+ } -+ -+ put_u32_unaligned(buffer, type); -+ put_u32_unaligned(buffer, value->value.u[i].u); -+ } -+ -+ return offset; -+} -+ -+static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, -+ struct fx_write_context *fx) -+{ -+ uint32_t value_offset = 0, assignment_type = 0, rhs_offset; -+ uint32_t type_offset; -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_ir_node *value = entry->args->node; -+ -+ if (entry->lhs_has_index) -+ hlsl_fixme(ctx, &var->loc, "Unsupported assignment to array element."); -+ -+ put_u32(buffer, entry->name_id); -+ put_u32(buffer, 0); /* TODO: destination index */ -+ type_offset = put_u32(buffer, 0); -+ rhs_offset = put_u32(buffer, 0); -+ -+ switch (value->type) -+ { -+ case HLSL_IR_CONSTANT: -+ { -+ struct hlsl_ir_constant *c = hlsl_ir_constant(value); -+ -+ value_offset = write_fx_4_state_numeric_value(c, fx); -+ assignment_type = 1; -+ break; -+ } -+ default: -+ hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); -+ } -+ -+ set_u32(buffer, type_offset, assignment_type); -+ set_u32(buffer, rhs_offset, value_offset); -+} -+ -+static bool state_block_contains_state(const char *name, unsigned int start, struct hlsl_state_block *block) -+{ -+ unsigned int i; -+ -+ for (i = start; i < block->count; ++i) -+ { -+ if (!ascii_strcasecmp(block->entries[i]->name, name)) -+ return true; -+ } -+ -+ return false; -+} -+ -+struct replace_state_context -+{ -+ const struct rhs_named_value *values; -+ struct hlsl_ir_var *var; -+}; -+ -+static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct replace_state_context *replace_context = context; -+ struct hlsl_ir_stateblock_constant *state_constant; -+ struct hlsl_ir_node *c; -+ unsigned int value; -+ -+ if (!replace_context->values) -+ return false; -+ if (instr->type != HLSL_IR_STATEBLOCK_CONSTANT) -+ return false; -+ -+ state_constant = hlsl_ir_stateblock_constant(instr); -+ if (!get_fx_4_state_enum_value(replace_context->values, state_constant->name, &value)) -+ { -+ hlsl_error(ctx, &replace_context->var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Unrecognized state constant %s.", state_constant->name); -+ return false; -+ } -+ -+ if (!(c = hlsl_new_uint_constant(ctx, value, &replace_context->var->loc))) -+ return false; -+ -+ list_add_before(&state_constant->node.entry, &c->entry); -+ hlsl_replace_node(&state_constant->node, c); -+ -+ return true; -+} -+ -+static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, -+ struct fx_write_context *fx) -+{ -+ static const struct rhs_named_value filter_values[] = -+ { -+ { "MIN_MAG_MIP_POINT", 0x00 }, -+ { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, -+ { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, -+ { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, -+ { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, -+ { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, -+ { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, -+ { "MIN_MAG_MIP_LINEAR", 0x15 }, -+ { "ANISOTROPIC", 0x55 }, -+ { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, -+ { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, -+ { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, -+ { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, -+ { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, -+ { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, -+ { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, -+ { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, -+ { "COMPARISON_ANISOTROPIC", 0xd5 }, -+ { NULL }, -+ }; -+ -+ static const struct rhs_named_value address_values[] = -+ { -+ { "WRAP", 1 }, -+ { "MIRROR", 2 }, -+ { "CLAMP", 3 }, -+ { "BORDER", 4 }, -+ { "MIRROR_ONCE", 5 }, -+ { NULL }, -+ }; -+ -+ static const struct rhs_named_value compare_func_values[] = -+ { -+ { "NEVER", 1 }, -+ { "LESS", 2 }, -+ { "EQUAL", 3 }, -+ { "LESS_EQUAL", 4 }, -+ { "GREATER", 5 }, -+ { "NOT_EQUAL", 6 }, -+ { "GREATER_EQUAL", 7 }, -+ { "ALWAYS", 8 }, -+ { NULL } -+ }; -+ -+ static const struct state -+ { -+ const char *name; -+ enum hlsl_type_class container; -+ enum hlsl_base_type type; -+ unsigned int dimx; -+ uint32_t id; -+ const struct rhs_named_value *values; -+ } -+ states[] = -+ { -+ { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, -+ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, -+ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, -+ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, -+ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, -+ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, -+ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, -+ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, -+ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, -+ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, -+ /* TODO: "Texture" field */ -+ }; -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ struct replace_state_context replace_context; -+ struct hlsl_ir_node *node, *cast; -+ const struct state *state = NULL; -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_type *state_type; -+ unsigned int i; -+ bool progress; -+ -+ for (i = 0; i < ARRAY_SIZE(states); ++i) -+ { -+ if (type->class == states[i].container -+ && !ascii_strcasecmp(entry->name, states[i].name)) -+ { -+ state = &states[i]; -+ break; -+ } -+ } -+ -+ if (!state) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unrecognized state name %s.", entry->name); -+ return; -+ } -+ -+ if (entry->args_count != 1) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unrecognized initializer for the state %s.", -+ entry->name); -+ return; -+ } -+ -+ entry->name_id = state->id; -+ -+ replace_context.values = state->values; -+ replace_context.var = var; -+ -+ /* Turned named constants to actual constants. */ -+ hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); -+ -+ if (state->dimx) -+ state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); -+ else -+ state_type = hlsl_get_scalar_type(ctx, state->type); -+ -+ /* Cast to expected property type. */ -+ node = entry->args->node; -+ if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) -+ return; -+ list_add_after(&node->entry, &cast->entry); -+ -+ hlsl_src_remove(entry->args); -+ hlsl_src_from_node(entry->args, cast); -+ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); -+ } while (progress); -+} -+ -+static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i, j; -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t count_offset, count; -+ -+ for (i = 0; i < elements_count; ++i) -+ { -+ struct hlsl_state_block *block; -+ -+ count_offset = put_u32(buffer, 0); -+ -+ count = 0; -+ if (var->state_blocks) -+ { -+ block = var->state_blocks[i]; -+ -+ for (j = 0; j < block->count; ++j) -+ { -+ struct hlsl_state_block_entry *entry = block->entries[j]; -+ -+ /* Skip if property is reassigned later. This will use the last assignment. */ -+ if (state_block_contains_state(entry->name, j + 1, block)) -+ continue; -+ -+ /* Resolve special constant names and property names. */ -+ resolve_fx_4_state_block_values(var, entry, fx); -+ -+ write_fx_4_state_assignment(var, entry, fx); -+ ++count; -+ } -+ } -+ -+ set_u32(buffer, count_offset, count); -+ } - } - - static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) - { -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); - struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t semantic_offset, bind_point = ~0u; -- uint32_t name_offset, type_offset; -+ uint32_t name_offset, type_offset, i; -+ struct hlsl_ctx *ctx = fx->ctx; - - if (var->reg_reservation.reg_type) - bind_point = var->reg_reservation.reg_index; -@@ -712,8 +1352,52 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ - put_u32(buffer, bind_point); /* Explicit bind point */ - -+ if (fx->child_effect && var->storage_modifiers & HLSL_STORAGE_SHARED) -+ { -+ ++fx->shared_object_count; -+ return; -+ } -+ -+ /* Initializer */ -+ switch (type->class) -+ { -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ fx->rtv_count += elements_count; -+ break; -+ case HLSL_CLASS_TEXTURE: -+ fx->texture_count += elements_count; -+ break; -+ case HLSL_CLASS_UAV: -+ fx->uav_count += elements_count; -+ break; -+ -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_VERTEX_SHADER: -+ /* FIXME: write shader blobs, once parser support works. */ -+ for (i = 0; i < elements_count; ++i) -+ put_u32(buffer, 0); -+ fx->shader_count += elements_count; -+ break; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ fx->dsv_count += elements_count; -+ break; -+ -+ case HLSL_CLASS_SAMPLER: -+ write_fx_4_state_object_initializer(var, fx); -+ fx->sampler_state_count += elements_count; -+ break; -+ -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", -+ type->e.numeric.type); -+ } -+ - put_u32(buffer, 0); /* Annotations count */ -- /* FIXME: write annotations */ -+ if (has_annotations(var)) -+ hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); -+ -+ ++fx->object_variable_count; - } - - static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx) -@@ -729,12 +1413,16 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_ir_var *var; - uint32_t count_offset; -+ bool shared; -+ -+ shared = fx->child_effect && b->modifiers & HLSL_STORAGE_SHARED; - - if (b->reservation.reg_type) - bind_point = b->reservation.reg_index; - if (b->type == HLSL_BUFFER_TEXTURE) - flags |= IS_TBUFFER; -- /* FIXME: set 'single' flag for fx_5_0 */ -+ if (ctx->profile->major_version == 5 && b->modifiers & HLSL_MODIFIER_SINGLE) -+ flags |= IS_SINGLE; - - name_offset = write_string(b->name, fx); - -@@ -744,8 +1432,17 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - count_offset = put_u32(buffer, 0); - put_u32(buffer, bind_point); /* Bind point */ - -- put_u32(buffer, 0); /* Annotations count */ -- /* FIXME: write annotations */ -+ if (shared) -+ { -+ ++fx->shared_buffer_count; -+ } -+ else -+ { -+ put_u32(buffer, 0); /* Annotations count */ -+ if (b->annotations) -+ hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); -+ ++fx->buffer_count; -+ } - - count = 0; - size = 0; -@@ -754,73 +1451,76 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - if (var->buffer != b) - continue; - -- write_fx_4_numeric_variable(var, fx); -+ write_fx_4_numeric_variable(var, shared, fx); - size += get_fx_4_type_size(var->data_type); - ++count; - } - - set_u32(buffer, count_offset, count); - set_u32(buffer, size_offset, align(size, 16)); -- -- fx->numeric_variable_count += count; - } - --static void write_buffers(struct fx_write_context *fx) -+static void write_buffers(struct fx_write_context *fx, bool shared) - { - struct hlsl_buffer *buffer; -- struct hlsl_block block; -- -- hlsl_block_init(&block); -- hlsl_prepend_global_uniform_copy(fx->ctx, &block); -- hlsl_block_init(&block); -- hlsl_calculate_buffer_offsets(fx->ctx); - - LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) - { -- if (!buffer->size) -+ if (!buffer->size && !fx->include_empty_buffers) -+ continue; -+ if (!strcmp(buffer->name, "$Params")) -+ continue; -+ if (fx->child_effect && (shared != !!(buffer->modifiers & HLSL_STORAGE_SHARED))) - continue; - - write_fx_4_buffer(buffer, fx); -- ++fx->buffer_count; - } - } - --static bool is_object_variable(const struct hlsl_ir_var *var) -+static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) - { - const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); - -- if (type->class != HLSL_CLASS_OBJECT) -- return false; -- -- switch (type->base_type) -+ switch (type->class) - { -- case HLSL_TYPE_SAMPLER: -- case HLSL_TYPE_TEXTURE: -- case HLSL_TYPE_UAV: -- case HLSL_TYPE_PIXELSHADER: -- case HLSL_TYPE_VERTEXSHADER: -- case HLSL_TYPE_RENDERTARGETVIEW: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_TEXTURE: -+ return true; -+ case HLSL_CLASS_UAV: -+ if (ctx->profile->major_version < 5) -+ return false; -+ if (type->e.resource.rasteriser_ordered) -+ return false; -+ return true; -+ case HLSL_CLASS_VERTEX_SHADER: - return true; -+ - default: - return false; - } - } - --static void write_objects(struct fx_write_context *fx) -+static void write_objects(struct fx_write_context *fx, bool shared) - { -+ struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_ir_var *var; -- uint32_t count = 0; - -- LIST_FOR_EACH_ENTRY(var, &fx->ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ if (shared && !fx->child_effect) -+ return; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (!is_object_variable(var)) -+ if (!is_supported_object_variable(ctx, var)) -+ continue; -+ -+ if (fx->child_effect && (shared != !!(var->storage_modifiers & HLSL_STORAGE_SHARED))) - continue; - - write_fx_4_object_variable(var, fx); -- ++count; - } -- -- fx->object_variable_count += count; - } - - static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) -@@ -833,10 +1533,10 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ - -- write_buffers(&fx); -- write_objects(&fx); -- /* TODO: shared buffers */ -- /* TODO: shared objects */ -+ write_buffers(&fx, false); -+ write_objects(&fx, false); -+ write_buffers(&fx, true); -+ write_objects(&fx, true); - - write_techniques(ctx->globals, &fx); - -@@ -844,20 +1544,20 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, fx.buffer_count); /* Buffer count. */ - put_u32(&buffer, fx.numeric_variable_count); /* Numeric variable count. */ - put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ -- put_u32(&buffer, 0); /* Pool buffer count. */ -- put_u32(&buffer, 0); /* Pool variable count. */ -- put_u32(&buffer, 0); /* Pool object count. */ -+ put_u32(&buffer, fx.shared_buffer_count); -+ put_u32(&buffer, fx.shared_numeric_variable_count); -+ put_u32(&buffer, fx.shared_object_count); - put_u32(&buffer, fx.technique_count); - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ -- put_u32(&buffer, 0); /* Texture object count. */ -+ put_u32(&buffer, fx.texture_count); - put_u32(&buffer, 0); /* Depth stencil state count. */ - put_u32(&buffer, 0); /* Blend state count. */ - put_u32(&buffer, 0); /* Rasterizer state count. */ -- put_u32(&buffer, 0); /* Sampler state count. */ -- put_u32(&buffer, 0); /* Rendertarget view count. */ -- put_u32(&buffer, 0); /* Depth stencil view count. */ -- put_u32(&buffer, 0); /* Shader count. */ -+ put_u32(&buffer, fx.sampler_state_count); -+ put_u32(&buffer, fx.rtv_count); -+ put_u32(&buffer, fx.dsv_count); -+ put_u32(&buffer, fx.shader_count); - put_u32(&buffer, 0); /* Inline shader count. */ - - set_u32(&buffer, size_offset, fx.unstructured.size); -@@ -870,15 +1570,15 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - set_status(&fx, buffer.status); - -- if (!fx.status) -+ if (fx.status < 0) -+ ctx->result = fx.status; -+ -+ if (!ctx->result) - { - out->code = buffer.data; - out->size = buffer.size; - } - -- if (fx.status < 0) -- ctx->result = fx.status; -- - return fx_write_context_cleanup(&fx); - } - -@@ -892,8 +1592,8 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ - -- write_buffers(&fx); -- write_objects(&fx); -+ write_buffers(&fx, false); -+ write_objects(&fx, false); - /* TODO: interface variables */ - - write_groups(&fx); -@@ -902,23 +1602,23 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, fx.buffer_count); /* Buffer count. */ - put_u32(&buffer, fx.numeric_variable_count); /* Numeric variable count. */ - put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ -- put_u32(&buffer, 0); /* Pool buffer count. */ -- put_u32(&buffer, 0); /* Pool variable count. */ -- put_u32(&buffer, 0); /* Pool object count. */ -+ put_u32(&buffer, fx.shared_buffer_count); -+ put_u32(&buffer, fx.shared_numeric_variable_count); -+ put_u32(&buffer, fx.shared_object_count); - put_u32(&buffer, fx.technique_count); - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ -- put_u32(&buffer, 0); /* Texture object count. */ -+ put_u32(&buffer, fx.texture_count); - put_u32(&buffer, 0); /* Depth stencil state count. */ - put_u32(&buffer, 0); /* Blend state count. */ - put_u32(&buffer, 0); /* Rasterizer state count. */ -- put_u32(&buffer, 0); /* Sampler state count. */ -- put_u32(&buffer, 0); /* Rendertarget view count. */ -- put_u32(&buffer, 0); /* Depth stencil view count. */ -- put_u32(&buffer, 0); /* Shader count. */ -+ put_u32(&buffer, fx.sampler_state_count); -+ put_u32(&buffer, fx.rtv_count); -+ put_u32(&buffer, fx.dsv_count); -+ put_u32(&buffer, fx.shader_count); - put_u32(&buffer, 0); /* Inline shader count. */ - put_u32(&buffer, fx.group_count); /* Group count. */ -- put_u32(&buffer, 0); /* UAV count. */ -+ put_u32(&buffer, fx.uav_count); - put_u32(&buffer, 0); /* Interface variables count. */ - put_u32(&buffer, 0); /* Interface variable element count. */ - put_u32(&buffer, 0); /* Class instance elements count. */ -@@ -933,15 +1633,15 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - set_status(&fx, buffer.status); - -- if (!fx.status) -+ if (fx.status < 0) -+ ctx->result = fx.status; -+ -+ if (!ctx->result) - { - out->code = buffer.data; - out->size = buffer.size; - } - -- if (fx.status < 0) -- ctx->result = fx.status; -- - return fx_write_context_cleanup(&fx); - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index bdd03c1e72a..3e482a5fc70 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -20,29 +20,14 @@ - - struct vkd3d_glsl_generator - { -- struct vkd3d_shader_version version; -+ struct vsir_program *program; - struct vkd3d_string_buffer buffer; - struct vkd3d_shader_location location; - struct vkd3d_shader_message_context *message_context; -+ unsigned int indent; - bool failed; - }; - --struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, -- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) --{ -- struct vkd3d_glsl_generator *generator; -- -- if (!(generator = vkd3d_malloc(sizeof(*generator)))) -- return NULL; -- -- memset(generator, 0, sizeof(*generator)); -- generator->version = *version; -- vkd3d_string_buffer_init(&generator->buffer); -- generator->location = *location; -- generator->message_context = message_context; -- return generator; --} -- - static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( - struct vkd3d_glsl_generator *generator, - enum vkd3d_shader_error error, const char *fmt, ...) -@@ -55,10 +40,23 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( - generator->failed = true; - } - -+static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) -+{ -+ vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); -+} -+ -+static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ shader_glsl_print_indent(&gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); -+} -+ - static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, - const struct vkd3d_shader_instruction *ins) - { -- const struct vkd3d_shader_version *version = &generator->version; -+ const struct vkd3d_shader_version *version = &generator->program->shader_version; - - /* - * TODO: Implement in_subroutine -@@ -66,6 +64,7 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, - */ - if (version->major >= 4) - { -+ shader_glsl_print_indent(&generator->buffer, generator->indent); - vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); - } - } -@@ -73,48 +72,57 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, - static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, - const struct vkd3d_shader_instruction *instruction) - { -+ generator->location = instruction->location; -+ - switch (instruction->handler_idx) - { - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: -+ case VKD3DSIH_NOP: - break; - case VKD3DSIH_RET: - shader_glsl_ret(generator, instruction); - break; - default: -- vkd3d_glsl_compiler_error(generator, -- VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Unhandled instruction %#x", instruction->handler_idx); -+ shader_glsl_unhandled(generator, instruction); - break; - } - } - --int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, -- struct vsir_program *program, struct vkd3d_shader_code *out) -+static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) - { -+ const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; -+ struct vkd3d_string_buffer *buffer = &gen->buffer; - unsigned int i; - void *code; - -- vkd3d_string_buffer_printf(&generator->buffer, "#version 440\n\n"); -- vkd3d_string_buffer_printf(&generator->buffer, "void main()\n{\n"); -+ ERR("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ -+ vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); -+ -+ vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); - -- generator->location.column = 0; -- for (i = 0; i < program->instructions.count; ++i) -+ vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); -+ -+ ++gen->indent; -+ for (i = 0; i < instructions->count; ++i) - { -- generator->location.line = i + 1; -- vkd3d_glsl_handle_instruction(generator, &program->instructions.elements[i]); -+ vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); - } - -- if (generator->failed) -- return VKD3D_ERROR_INVALID_SHADER; -+ vkd3d_string_buffer_printf(buffer, "}\n"); - -- vkd3d_string_buffer_printf(&generator->buffer, "}\n"); -+ if (TRACE_ON()) -+ vkd3d_string_buffer_trace(buffer); -+ -+ if (gen->failed) -+ return VKD3D_ERROR_INVALID_SHADER; - -- if ((code = vkd3d_malloc(generator->buffer.buffer_size))) -+ if ((code = vkd3d_malloc(buffer->buffer_size))) - { -- memcpy(code, generator->buffer.buffer, generator->buffer.content_size); -- out->size = generator->buffer.content_size; -+ memcpy(code, buffer->buffer, buffer->content_size); -+ out->size = buffer->content_size; - out->code = code; - } - else return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -122,8 +130,33 @@ int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, - return VKD3D_OK; - } - --void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator) -+static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) -+{ -+ vkd3d_string_buffer_cleanup(&gen->buffer); -+} -+ -+static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, -+ struct vsir_program *program, struct vkd3d_shader_message_context *message_context) - { -- vkd3d_string_buffer_cleanup(&generator->buffer); -- vkd3d_free(generator); -+ memset(gen, 0, sizeof(*gen)); -+ gen->program = program; -+ vkd3d_string_buffer_init(&gen->buffer); -+ gen->message_context = message_context; -+} -+ -+int glsl_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_glsl_generator generator; -+ int ret; -+ -+ if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0) -+ return ret; -+ -+ vkd3d_glsl_generator_init(&generator, program, message_context); -+ ret = vkd3d_glsl_generator_generate(&generator, out); -+ vkd3d_glsl_generator_cleanup(&generator); -+ -+ return ret; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 538f083df9c..99214fba6de 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -134,14 +134,43 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) - return hlsl_get_var(scope->upper, name); - } - --void hlsl_free_var(struct hlsl_ir_var *decl) -+static void free_state_block_entry(struct hlsl_state_block_entry *entry) -+{ -+ unsigned int i; -+ -+ vkd3d_free(entry->name); -+ for (i = 0; i < entry->args_count; ++i) -+ hlsl_src_remove(&entry->args[i]); -+ vkd3d_free(entry->args); -+ hlsl_block_cleanup(entry->instrs); -+ vkd3d_free(entry->instrs); -+ vkd3d_free(entry); -+} -+ -+void hlsl_free_state_block(struct hlsl_state_block *state_block) - { - unsigned int k; - -+ assert(state_block); -+ for (k = 0; k < state_block->count; ++k) -+ free_state_block_entry(state_block->entries[k]); -+ vkd3d_free(state_block->entries); -+ vkd3d_free(state_block); -+} -+ -+void hlsl_free_var(struct hlsl_ir_var *decl) -+{ -+ unsigned int k, i; -+ - vkd3d_free((void *)decl->name); - hlsl_cleanup_semantic(&decl->semantic); - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - vkd3d_free((void *)decl->objects_usage[k]); -+ -+ for (i = 0; i < decl->state_block_count; ++i) -+ hlsl_free_state_block(decl->state_blocks[i]); -+ vkd3d_free(decl->state_blocks); -+ - vkd3d_free(decl); - } - -@@ -201,50 +230,46 @@ unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) - - bool hlsl_type_is_resource(const struct hlsl_type *type) - { -- if (type->class == HLSL_CLASS_ARRAY) -- return hlsl_type_is_resource(type->e.array.type); -- -- if (type->class == HLSL_CLASS_OBJECT) -+ switch (type->class) - { -- switch (type->base_type) -- { -- case HLSL_TYPE_TEXTURE: -- case HLSL_TYPE_SAMPLER: -- case HLSL_TYPE_UAV: -- return true; -- default: -- return false; -- } -+ case HLSL_CLASS_ARRAY: -+ return hlsl_type_is_resource(type->e.array.type); -+ -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ return true; -+ -+ default: -+ return false; - } -- return false; - } - - /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or - * resources, since for both their data types span across a single regset. */ - static enum hlsl_regset type_get_regset(const struct hlsl_type *type) - { -- if (hlsl_is_numeric_type(type)) -- return HLSL_REGSET_NUMERIC; -+ switch (type->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ return HLSL_REGSET_NUMERIC; - -- if (type->class == HLSL_CLASS_ARRAY) -- return type_get_regset(type->e.array.type); -+ case HLSL_CLASS_ARRAY: -+ return type_get_regset(type->e.array.type); - -- if (type->class == HLSL_CLASS_OBJECT) -- { -- switch (type->base_type) -- { -- case HLSL_TYPE_TEXTURE: -- return HLSL_REGSET_TEXTURES; -+ case HLSL_CLASS_SAMPLER: -+ return HLSL_REGSET_SAMPLERS; - -- case HLSL_TYPE_SAMPLER: -- return HLSL_REGSET_SAMPLERS; -+ case HLSL_CLASS_TEXTURE: -+ return HLSL_REGSET_TEXTURES; - -- case HLSL_TYPE_UAV: -- return HLSL_REGSET_UAVS; -+ case HLSL_CLASS_UAV: -+ return HLSL_REGSET_UAVS; - -- default: -- vkd3d_unreachable(); -- } -+ default: -+ break; - } - - vkd3d_unreachable(); -@@ -330,16 +355,28 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - break; - } - -- case HLSL_CLASS_OBJECT: -- { -- if (hlsl_type_is_resource(type)) -- { -- enum hlsl_regset regset = type_get_regset(type); -+ case HLSL_CLASS_SAMPLER: -+ type->reg_size[HLSL_REGSET_SAMPLERS] = 1; -+ break; - -- type->reg_size[regset] = 1; -- } -+ case HLSL_CLASS_TEXTURE: -+ type->reg_size[HLSL_REGSET_TEXTURES] = 1; -+ break; -+ -+ case HLSL_CLASS_UAV: -+ type->reg_size[HLSL_REGSET_UAVS] = 1; -+ break; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_VOID: - break; -- } - } - } - -@@ -352,6 +389,25 @@ unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, - return type->reg_size[regset]; - } - -+static struct hlsl_type *hlsl_new_simple_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class class) -+{ -+ struct hlsl_type *type; -+ -+ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) -+ return NULL; -+ if (!(type->name = hlsl_strdup(ctx, name))) -+ { -+ vkd3d_free(type); -+ return NULL; -+ } -+ type->class = class; -+ hlsl_type_calculate_reg_size(ctx, type); -+ -+ list_add_tail(&ctx->types, &type->entry); -+ -+ return type; -+} -+ - static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class type_class, - enum hlsl_base_type base_type, unsigned dimx, unsigned dimy) - { -@@ -365,7 +421,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e - return NULL; - } - type->class = type_class; -- type->base_type = base_type; -+ type->e.numeric.type = base_type; - type->dimx = dimx; - type->dimy = dimy; - hlsl_type_calculate_reg_size(ctx, type); -@@ -377,7 +433,32 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e - - static bool type_is_single_component(const struct hlsl_type *type) - { -- return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_OBJECT; -+ switch (type->class) -+ { -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: -+ return true; -+ -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ case HLSL_CLASS_STRUCT: -+ case HLSL_CLASS_ARRAY: -+ return false; -+ -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_VOID: -+ break; -+ } -+ vkd3d_unreachable(); - } - - /* Given a type and a component index, this function moves one step through the path required to -@@ -400,7 +481,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - { - case HLSL_CLASS_VECTOR: - assert(index < type->dimx); -- *type_ptr = hlsl_get_scalar_type(ctx, type->base_type); -+ *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); - *index_ptr = 0; - return index; - -@@ -410,7 +491,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - bool row_major = hlsl_type_is_row_major(type); - - assert(index < type->dimx * type->dimy); -- *type_ptr = hlsl_get_vector_type(ctx, type->base_type, row_major ? type->dimx : type->dimy); -+ *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); - *index_ptr = row_major ? x : y; - return row_major ? y : x; - } -@@ -496,11 +577,21 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - } - break; - -- case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: - assert(idx == 0); - break; - -- default: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_VOID: - vkd3d_unreachable(); - } - type = next_type; -@@ -674,13 +765,13 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co - switch (type->class) - { - case HLSL_CLASS_VECTOR: -- return hlsl_get_scalar_type(ctx, type->base_type); -+ return hlsl_get_scalar_type(ctx, type->e.numeric.type); - - case HLSL_CLASS_MATRIX: - if (hlsl_type_is_row_major(type)) -- return hlsl_get_vector_type(ctx, type->base_type, type->dimx); -+ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); - else -- return hlsl_get_vector_type(ctx, type->base_type, type->dimy); -+ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimy); - - case HLSL_CLASS_ARRAY: - return type->e.array.type; -@@ -727,7 +818,6 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - if (!(type = hlsl_alloc(ctx, sizeof(*type)))) - return NULL; - type->class = HLSL_CLASS_STRUCT; -- type->base_type = HLSL_TYPE_VOID; - type->name = name; - type->dimy = 1; - type->e.record.fields = fields; -@@ -746,8 +836,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ - - if (!(type = hlsl_alloc(ctx, sizeof(*type)))) - return NULL; -- type->class = HLSL_CLASS_OBJECT; -- type->base_type = HLSL_TYPE_TEXTURE; -+ type->class = HLSL_CLASS_TEXTURE; - type->dimx = 4; - type->dimy = 1; - type->sampler_dim = dim; -@@ -765,8 +854,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim - - if (!(type = hlsl_alloc(ctx, sizeof(*type)))) - return NULL; -- type->class = HLSL_CLASS_OBJECT; -- type->base_type = HLSL_TYPE_UAV; -+ type->class = HLSL_CLASS_UAV; - type->dimx = format->dimx; - type->dimy = 1; - type->sampler_dim = dim; -@@ -784,7 +872,10 @@ static const char * get_case_insensitive_typename(const char *name) - "dword", - "float", - "matrix", -+ "pixelshader", -+ "texture", - "vector", -+ "vertexshader", - }; - unsigned int i; - -@@ -865,12 +956,24 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_ARRAY: - return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; - -- case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: - return 1; - -- default: -- vkd3d_unreachable(); -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_VOID: -+ break; - } -+ -+ vkd3d_unreachable(); - } - - bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2) -@@ -880,56 +983,73 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - - if (t1->class != t2->class) - return false; -- if (t1->base_type != t2->base_type) -- return false; -- if (t1->base_type == HLSL_TYPE_SAMPLER || t1->base_type == HLSL_TYPE_TEXTURE -- || t1->base_type == HLSL_TYPE_UAV) -- { -- if (t1->sampler_dim != t2->sampler_dim) -- return false; -- if ((t1->base_type == HLSL_TYPE_TEXTURE || t1->base_type == HLSL_TYPE_UAV) -- && t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC -- && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) -- return false; -- if (t1->base_type == HLSL_TYPE_UAV && t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) -- return false; -- } -- if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) -- != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) -- return false; -- if (t1->dimx != t2->dimx) -- return false; -- if (t1->dimy != t2->dimy) -- return false; -- if (t1->class == HLSL_CLASS_STRUCT) -- { -- size_t i; -- -- if (t1->e.record.field_count != t2->e.record.field_count) -- return false; - -- for (i = 0; i < t1->e.record.field_count; ++i) -- { -- const struct hlsl_struct_field *field1 = &t1->e.record.fields[i]; -- const struct hlsl_struct_field *field2 = &t2->e.record.fields[i]; -+ switch (t1->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ if (t1->e.numeric.type != t2->e.numeric.type) -+ return false; -+ if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) -+ != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) -+ return false; -+ if (t1->dimx != t2->dimx) -+ return false; -+ if (t1->dimy != t2->dimy) -+ return false; -+ return true; - -- if (!hlsl_types_are_equal(field1->type, field2->type)) -+ case HLSL_CLASS_UAV: -+ if (t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) -+ return false; -+ /* fall through */ -+ case HLSL_CLASS_TEXTURE: -+ if (t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC -+ && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) - return false; -+ /* fall through */ -+ case HLSL_CLASS_SAMPLER: -+ if (t1->sampler_dim != t2->sampler_dim) -+ return false; -+ return true; - -- if (strcmp(field1->name, field2->name)) -+ case HLSL_CLASS_STRUCT: -+ if (t1->e.record.field_count != t2->e.record.field_count) - return false; -- } -- } -- if (t1->class == HLSL_CLASS_ARRAY) -- return t1->e.array.elements_count == t2->e.array.elements_count -- && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); -- if (t1->class == HLSL_CLASS_OBJECT) -- { -- if (t1->base_type == HLSL_TYPE_TECHNIQUE && t1->e.version != t2->e.version) -- return false; -+ -+ for (size_t i = 0; i < t1->e.record.field_count; ++i) -+ { -+ const struct hlsl_struct_field *field1 = &t1->e.record.fields[i]; -+ const struct hlsl_struct_field *field2 = &t2->e.record.fields[i]; -+ -+ if (!hlsl_types_are_equal(field1->type, field2->type)) -+ return false; -+ -+ if (strcmp(field1->name, field2->name)) -+ return false; -+ } -+ return true; -+ -+ case HLSL_CLASS_ARRAY: -+ return t1->e.array.elements_count == t2->e.array.elements_count -+ && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); -+ -+ case HLSL_CLASS_TECHNIQUE: -+ return t1->e.version == t2->e.version; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_VOID: -+ return true; - } - -- return true; -+ vkd3d_unreachable(); - } - - struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, -@@ -950,7 +1070,6 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - } - } - type->class = old->class; -- type->base_type = old->base_type; - type->dimx = old->dimx; - type->dimy = old->dimy; - type->modifiers = old->modifiers | modifiers; -@@ -962,6 +1081,12 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - - switch (old->class) - { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ type->e.numeric.type = old->e.numeric.type; -+ break; -+ - case HLSL_CLASS_ARRAY: - if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) - { -@@ -1008,14 +1133,15 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - break; - } - -- case HLSL_CLASS_OBJECT: -- if (type->base_type == HLSL_TYPE_TECHNIQUE) -- type->e.version = old->e.version; -- if (old->base_type == HLSL_TYPE_TEXTURE || old->base_type == HLSL_TYPE_UAV) -- { -- type->e.resource.format = old->e.resource.format; -- type->e.resource.rasteriser_ordered = old->e.resource.rasteriser_ordered; -- } -+ case HLSL_CLASS_UAV: -+ type->e.resource.rasteriser_ordered = old->e.resource.rasteriser_ordered; -+ /* fall through */ -+ case HLSL_CLASS_TEXTURE: -+ type->e.resource.format = old->e.resource.format; -+ break; -+ -+ case HLSL_CLASS_TECHNIQUE: -+ type->e.version = old->e.version; - break; - - default: -@@ -1346,6 +1472,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - -+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -+ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; -+ -+ assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); -+ assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); -+ return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); -+} -+ - struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) - { -@@ -1538,16 +1674,38 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned - - if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) - return NULL; -+ assert(hlsl_is_numeric_type(val->data_type)); - if (components == 1) -- type = hlsl_get_scalar_type(ctx, val->data_type->base_type); -+ type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); - else -- type = hlsl_get_vector_type(ctx, val->data_type->base_type, components); -+ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components); - init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); - hlsl_src_from_node(&swizzle->val, val); - swizzle->swizzle = s; - return &swizzle->node; - } - -+struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, -+ struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_stateblock_constant *constant; -+ struct hlsl_type *type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); -+ -+ if (!(constant = hlsl_alloc(ctx, sizeof(*constant)))) -+ return NULL; -+ -+ init_node(&constant->node, HLSL_IR_STATEBLOCK_CONSTANT, type, loc); -+ -+ if (!(constant->name = hlsl_alloc(ctx, strlen(name) + 1))) -+ { -+ vkd3d_free(constant); -+ return NULL; -+ } -+ strcpy(constant->name, name); -+ -+ return &constant->node; -+} -+ - bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) - { - struct hlsl_type *type = index->val.node->data_type; -@@ -1557,7 +1715,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) - - bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) - { -- return index->val.node->data_type->class == HLSL_CLASS_OBJECT; -+ const struct hlsl_type *type = index->val.node->data_type; -+ -+ return type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV; - } - - bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) -@@ -1578,10 +1738,10 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v - if (!(index = hlsl_alloc(ctx, sizeof(*index)))) - return NULL; - -- if (type->class == HLSL_CLASS_OBJECT) -+ if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV) - type = type->e.resource.format; - else if (type->class == HLSL_CLASS_MATRIX) -- type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); -+ type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); - else - type = hlsl_get_element_type_from_path_index(ctx, type, idx); - -@@ -1868,6 +2028,12 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr - return dst; - } - -+static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, -+ struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) -+{ -+ return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); -+} -+ - void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) - { - hlsl_block_cleanup(&c->body); -@@ -1963,6 +2129,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - - case HLSL_IR_SWIZZLE: - return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); -+ -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); - } - - vkd3d_unreachable(); -@@ -2018,7 +2187,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, - } - - struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, -- const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) -+ uint32_t modifiers, const struct hlsl_reg_reservation *reservation, struct hlsl_scope *annotations, -+ const struct vkd3d_shader_location *loc) - { - struct hlsl_buffer *buffer; - -@@ -2026,8 +2196,10 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type - return NULL; - buffer->type = type; - buffer->name = name; -+ buffer->modifiers = modifiers; - if (reservation) - buffer->reservation = *reservation; -+ buffer->annotations = annotations; - buffer->loc = *loc; - list_add_tail(&ctx->buffers, &buffer->entry); - return buffer; -@@ -2130,6 +2302,19 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - [HLSL_TYPE_BOOL] = "bool", - }; - -+ static const char *const dimensions[] = -+ { -+ [HLSL_SAMPLER_DIM_1D] = "1D", -+ [HLSL_SAMPLER_DIM_2D] = "2D", -+ [HLSL_SAMPLER_DIM_3D] = "3D", -+ [HLSL_SAMPLER_DIM_CUBE] = "Cube", -+ [HLSL_SAMPLER_DIM_1DARRAY] = "1DArray", -+ [HLSL_SAMPLER_DIM_2DARRAY] = "2DArray", -+ [HLSL_SAMPLER_DIM_2DMS] = "2DMS", -+ [HLSL_SAMPLER_DIM_2DMSARRAY] = "2DMSArray", -+ [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", -+ }; -+ - if (!(string = hlsl_get_string_buffer(ctx))) - return NULL; - -@@ -2142,18 +2327,18 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - switch (type->class) - { - case HLSL_CLASS_SCALAR: -- assert(type->base_type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "%s", base_types[type->base_type]); -+ assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); - return string; - - case HLSL_CLASS_VECTOR: -- assert(type->base_type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "%s%u", base_types[type->base_type], type->dimx); -+ assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); - return string; - - case HLSL_CLASS_MATRIX: -- assert(type->base_type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->base_type], type->dimy, type->dimx); -+ assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); - return string; - - case HLSL_CLASS_ARRAY: -@@ -2183,71 +2368,60 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - vkd3d_string_buffer_printf(string, ""); - return string; - -- case HLSL_CLASS_OBJECT: -- { -- static const char *const dimensions[] = -+ case HLSL_CLASS_TEXTURE: -+ if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) - { -- [HLSL_SAMPLER_DIM_1D] = "1D", -- [HLSL_SAMPLER_DIM_2D] = "2D", -- [HLSL_SAMPLER_DIM_3D] = "3D", -- [HLSL_SAMPLER_DIM_CUBE] = "Cube", -- [HLSL_SAMPLER_DIM_1DARRAY] = "1DArray", -- [HLSL_SAMPLER_DIM_2DARRAY] = "2DArray", -- [HLSL_SAMPLER_DIM_2DMS] = "2DMS", -- [HLSL_SAMPLER_DIM_2DMSARRAY] = "2DMSArray", -- [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", -- }; -- -- switch (type->base_type) -- { -- case HLSL_TYPE_TEXTURE: -- if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -- { -- vkd3d_string_buffer_printf(string, "Texture"); -- return string; -- } -- -- assert(type->e.resource.format->base_type < ARRAY_SIZE(base_types)); -- if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) -- { -- vkd3d_string_buffer_printf(string, "Buffer"); -- } -- else -- { -- assert(type->sampler_dim < ARRAY_SIZE(dimensions)); -- vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); -- } -- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -- { -- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -- hlsl_release_string_buffer(ctx, inner_string); -- } -- return string; -- -- case HLSL_TYPE_UAV: -- if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) -- vkd3d_string_buffer_printf(string, "RWBuffer"); -- else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -- vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); -- else -- vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); -- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -- { -- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -- hlsl_release_string_buffer(ctx, inner_string); -- } -- return string; -+ vkd3d_string_buffer_printf(string, "Texture"); -+ return string; -+ } - -- default: -- vkd3d_string_buffer_printf(string, ""); -- return string; -+ assert(hlsl_is_numeric_type(type->e.resource.format)); -+ assert(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); -+ if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) -+ { -+ vkd3d_string_buffer_printf(string, "Buffer"); - } -- } -+ else -+ { -+ assert(type->sampler_dim < ARRAY_SIZE(dimensions)); -+ vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); -+ } -+ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -+ { -+ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -+ hlsl_release_string_buffer(ctx, inner_string); -+ } -+ return string; - -- default: -- vkd3d_string_buffer_printf(string, ""); -+ case HLSL_CLASS_UAV: -+ if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) -+ vkd3d_string_buffer_printf(string, "RWBuffer"); -+ else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -+ vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); -+ else -+ vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); -+ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -+ { -+ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -+ hlsl_release_string_buffer(ctx, inner_string); -+ } - return string; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_VOID: -+ break; - } -+ -+ vkd3d_string_buffer_printf(string, ""); -+ return string; - } - - struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -@@ -2525,7 +2699,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl - { - const union hlsl_constant_value_component *value = &constant->value.u[x]; - -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - vkd3d_string_buffer_printf(buffer, "%s ", value->u ? "true" : "false"); -@@ -2611,10 +2785,10 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP2_MUL] = "*", - [HLSL_OP2_NEQUAL] = "!=", - [HLSL_OP2_RSHIFT] = ">>", -+ [HLSL_OP2_SLT] = "slt", - - [HLSL_OP3_CMP] = "cmp", - [HLSL_OP3_DP2ADD] = "dp2add", -- [HLSL_OP3_MOVC] = "movc", - [HLSL_OP3_TERNARY] = "ternary", - }; - -@@ -2791,6 +2965,12 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ - vkd3d_string_buffer_printf(buffer, "]"); - } - -+static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, -+ const struct hlsl_ir_stateblock_constant *constant) -+{ -+ vkd3d_string_buffer_printf(buffer, "%s", constant->name); -+} -+ - static void dump_ir_switch(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_switch *s) - { - struct hlsl_ir_switch_case *c; -@@ -2879,6 +3059,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - case HLSL_IR_SWIZZLE: - dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); - break; -+ -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); -+ break; - } - } - -@@ -3051,6 +3235,12 @@ static void free_ir_index(struct hlsl_ir_index *index) - vkd3d_free(index); - } - -+static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) -+{ -+ vkd3d_free(constant->name); -+ vkd3d_free(constant); -+} -+ - void hlsl_free_instr(struct hlsl_ir_node *node) - { - assert(list_empty(&node->uses)); -@@ -3108,6 +3298,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) - case HLSL_IR_SWITCH: - free_ir_switch(hlsl_ir_switch(node)); - break; -+ -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); -+ break; - } - } - -@@ -3273,7 +3467,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) - {"cs_4_0", VKD3D_SHADER_TYPE_COMPUTE, 4, 0, 0, 0, false}, - {"cs_4_1", VKD3D_SHADER_TYPE_COMPUTE, 4, 1, 0, 0, false}, - {"cs_5_0", VKD3D_SHADER_TYPE_COMPUTE, 5, 0, 0, 0, false}, -+ {"cs_5_1", VKD3D_SHADER_TYPE_COMPUTE, 5, 1, 0, 0, false}, - {"ds_5_0", VKD3D_SHADER_TYPE_DOMAIN, 5, 0, 0, 0, false}, -+ {"ds_5_1", VKD3D_SHADER_TYPE_DOMAIN, 5, 1, 0, 0, false}, - {"fx_2_0", VKD3D_SHADER_TYPE_EFFECT, 2, 0, 0, 0, false}, - {"fx_4_0", VKD3D_SHADER_TYPE_EFFECT, 4, 0, 0, 0, false}, - {"fx_4_1", VKD3D_SHADER_TYPE_EFFECT, 4, 1, 0, 0, false}, -@@ -3281,7 +3477,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) - {"gs_4_0", VKD3D_SHADER_TYPE_GEOMETRY, 4, 0, 0, 0, false}, - {"gs_4_1", VKD3D_SHADER_TYPE_GEOMETRY, 4, 1, 0, 0, false}, - {"gs_5_0", VKD3D_SHADER_TYPE_GEOMETRY, 5, 0, 0, 0, false}, -+ {"gs_5_1", VKD3D_SHADER_TYPE_GEOMETRY, 5, 1, 0, 0, false}, - {"hs_5_0", VKD3D_SHADER_TYPE_HULL, 5, 0, 0, 0, false}, -+ {"hs_5_1", VKD3D_SHADER_TYPE_HULL, 5, 1, 0, 0, false}, - {"ps.1.0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false}, - {"ps.1.1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false}, - {"ps.1.2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false}, -@@ -3309,6 +3507,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) - {"ps_4_0_level_9_3", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 3, false}, - {"ps_4_1", VKD3D_SHADER_TYPE_PIXEL, 4, 1, 0, 0, false}, - {"ps_5_0", VKD3D_SHADER_TYPE_PIXEL, 5, 0, 0, 0, false}, -+ {"ps_5_1", VKD3D_SHADER_TYPE_PIXEL, 5, 1, 0, 0, false}, - {"tx_1_0", VKD3D_SHADER_TYPE_TEXTURE, 1, 0, 0, 0, false}, - {"vs.1.0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false}, - {"vs.1.1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false}, -@@ -3330,6 +3529,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) - {"vs_4_0_level_9_3", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 3, false}, - {"vs_4_1", VKD3D_SHADER_TYPE_VERTEX, 4, 1, 0, 0, false}, - {"vs_5_0", VKD3D_SHADER_TYPE_VERTEX, 5, 0, 0, 0, false}, -+ {"vs_5_1", VKD3D_SHADER_TYPE_VERTEX, 5, 1, 0, 0, false}, - }; - - for (i = 0; i < ARRAY_SIZE(profiles); ++i) -@@ -3351,6 +3551,7 @@ static int compare_function_rb(const void *key, const struct rb_entry *entry) - - static void declare_predefined_types(struct hlsl_ctx *ctx) - { -+ struct vkd3d_string_buffer *name; - unsigned int x, y, bt, i, v; - struct hlsl_type *type; - -@@ -3363,7 +3564,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - "uint", - "bool", - }; -- char name[15]; - - static const char *const variants_float[] = {"min10float", "min16float"}; - static const char *const variants_int[] = {"min12int", "min16int"}; -@@ -3391,14 +3591,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, - {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, - {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, -- {"fxgroup", HLSL_CLASS_OBJECT, HLSL_TYPE_EFFECT_GROUP, 1, 1}, -- {"pass", HLSL_CLASS_OBJECT, HLSL_TYPE_PASS, 1, 1}, -- {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, -- {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, -- {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, -- {"VERTEXSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, -- {"RenderTargetView",HLSL_CLASS_OBJECT, HLSL_TYPE_RENDERTARGETVIEW, 1, 1}, -- {"DepthStencilView",HLSL_CLASS_OBJECT, HLSL_TYPE_DEPTHSTENCILVIEW, 1, 1}, - }; - - static const struct -@@ -3413,28 +3605,34 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - {"technique11", 11}, - }; - -+ if (!(name = hlsl_get_string_buffer(ctx))) -+ return; -+ - for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) - { - for (y = 1; y <= 4; ++y) - { - for (x = 1; x <= 4; ++x) - { -- sprintf(name, "%s%ux%u", names[bt], y, x); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s%ux%u", names[bt], y, x); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); - hlsl_scope_add_type(ctx->globals, type); - ctx->builtin_types.matrix[bt][x - 1][y - 1] = type; - - if (y == 1) - { -- sprintf(name, "%s%u", names[bt], x); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s%u", names[bt], x); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); - hlsl_scope_add_type(ctx->globals, type); - ctx->builtin_types.vector[bt][x - 1] = type; - - if (x == 1) - { -- sprintf(name, "%s", names[bt]); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s", names[bt]); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); - hlsl_scope_add_type(ctx->globals, type); - ctx->builtin_types.scalar[bt] = type; - } -@@ -3477,22 +3675,25 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - { - for (x = 1; x <= 4; ++x) - { -- sprintf(name, "%s%ux%u", variants[v], y, x); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s%ux%u", variants[v], y, x); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); - type->is_minimum_precision = 1; - hlsl_scope_add_type(ctx->globals, type); - - if (y == 1) - { -- sprintf(name, "%s%u", variants[v], x); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s%u", variants[v], x); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); - type->is_minimum_precision = 1; - hlsl_scope_add_type(ctx->globals, type); - - if (x == 1) - { -- sprintf(name, "%s", variants[v]); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s", variants[v]); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); - type->is_minimum_precision = 1; - hlsl_scope_add_type(ctx->globals, type); - } -@@ -3504,12 +3705,20 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - for (bt = 0; bt <= HLSL_SAMPLER_DIM_LAST_SAMPLER; ++bt) - { -- type = hlsl_new_type(ctx, sampler_names[bt], HLSL_CLASS_OBJECT, HLSL_TYPE_SAMPLER, 1, 1); -+ type = hlsl_new_simple_type(ctx, sampler_names[bt], HLSL_CLASS_SAMPLER); - type->sampler_dim = bt; - ctx->builtin_types.sampler[bt] = type; - } - -- ctx->builtin_types.Void = hlsl_new_type(ctx, "void", HLSL_CLASS_OBJECT, HLSL_TYPE_VOID, 1, 1); -+ ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "vertexshader", HLSL_CLASS_VERTEX_SHADER)); - - for (i = 0; i < ARRAY_SIZE(effect_types); ++i) - { -@@ -3520,10 +3729,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - for (i = 0; i < ARRAY_SIZE(technique_types); ++i) - { -- type = hlsl_new_type(ctx, technique_types[i].name, HLSL_CLASS_OBJECT, HLSL_TYPE_TECHNIQUE, 1, 1); -+ type = hlsl_new_simple_type(ctx, technique_types[i].name, HLSL_CLASS_TECHNIQUE); - type->e.version = technique_types[i].version; - hlsl_scope_add_type(ctx->globals, type); - } -+ -+ hlsl_release_string_buffer(ctx, name); - } - - static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, -@@ -3571,27 +3782,46 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil - list_init(&ctx->buffers); - - if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, -- hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) -+ hlsl_strdup(ctx, "$Globals"), 0, NULL, NULL, &ctx->location))) - return false; - if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, -- hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) -+ hlsl_strdup(ctx, "$Params"), 0, NULL, NULL, &ctx->location))) - return false; - ctx->cur_buffer = ctx->globals_buffer; - -+ ctx->warn_implicit_truncation = true; -+ - for (i = 0; i < compile_info->option_count; ++i) - { - const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; - -- if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) -+ switch (option->name) - { -- if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) -- ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; -- else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) -- ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; -- } -- else if (option->name == VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY) -- { -- ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; -+ case VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER: -+ if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; -+ else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; -+ break; -+ -+ case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: -+ ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; -+ break; -+ -+ case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: -+ ctx->child_effect = option->value; -+ break; -+ -+ case VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION: -+ ctx->warn_implicit_truncation = option->value; -+ break; -+ -+ case VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS: -+ ctx->include_empty_buffers = option->value; -+ break; -+ -+ default: -+ break; - } - } - -@@ -3615,6 +3845,21 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - - rb_destroy(&ctx->functions, free_function_rb, NULL); - -+ /* State blocks must be free before the variables, because they contain instructions that may -+ * refer to them. */ -+ LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ for (i = 0; i < var->state_block_count; ++i) -+ hlsl_free_state_block(var->state_blocks[i]); -+ vkd3d_free(var->state_blocks); -+ var->state_blocks = NULL; -+ var->state_block_count = 0; -+ var->state_block_capacity = 0; -+ } -+ } -+ - LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) - { - LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) -@@ -3638,6 +3883,7 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { -+ enum vkd3d_shader_target_type target_type = compile_info->target_type; - const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; - struct hlsl_ir_function_decl *decl, *entry_func = NULL; - const struct hlsl_profile_info *profile; -@@ -3659,25 +3905,25 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d - return VKD3D_ERROR_NOT_IMPLEMENTED; - } - -- if (compile_info->target_type != VKD3D_SHADER_TARGET_FX && profile->type == VKD3D_SHADER_TYPE_EFFECT) -+ if (target_type != VKD3D_SHADER_TARGET_FX && profile->type == VKD3D_SHADER_TYPE_EFFECT) - { - vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, - "The '%s' target profile is only compatible with the 'fx' target type.", profile->name); - return VKD3D_ERROR_INVALID_ARGUMENT; - } -- else if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_BYTECODE && profile->major_version > 3) -+ else if (target_type == VKD3D_SHADER_TARGET_D3D_BYTECODE && profile->major_version > 3) - { - vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, - "The '%s' target profile is incompatible with the 'd3dbc' target type.", profile->name); - return VKD3D_ERROR_INVALID_ARGUMENT; - } -- else if (compile_info->target_type == VKD3D_SHADER_TARGET_DXBC_TPF && profile->major_version < 4) -+ else if (target_type == VKD3D_SHADER_TARGET_DXBC_TPF && profile->major_version < 4) - { - vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, - "The '%s' target profile is incompatible with the 'dxbc-tpf' target type.", profile->name); - return VKD3D_ERROR_INVALID_ARGUMENT; - } -- else if (compile_info->target_type == VKD3D_SHADER_TARGET_FX && profile->type != VKD3D_SHADER_TYPE_EFFECT) -+ else if (target_type == VKD3D_SHADER_TARGET_FX && profile->type != VKD3D_SHADER_TYPE_EFFECT) - { - vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, - "The '%s' target profile is incompatible with the 'fx' target type.", profile->name); -@@ -3741,8 +3987,41 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d - return VKD3D_ERROR_INVALID_SHADER; - } - -- ret = hlsl_emit_bytecode(&ctx, entry_func, compile_info->target_type, out); -+ if (target_type == VKD3D_SHADER_TARGET_SPIRV_BINARY -+ || target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT -+ || target_type == VKD3D_SHADER_TARGET_D3D_ASM) -+ { -+ uint64_t config_flags = vkd3d_shader_init_config_flags(); -+ struct vkd3d_shader_compile_info info = *compile_info; -+ struct vsir_program program; -+ -+ if (profile->major_version < 4) -+ { -+ if ((ret = hlsl_emit_bytecode(&ctx, entry_func, VKD3D_SHADER_TARGET_D3D_BYTECODE, &info.source)) < 0) -+ goto done; -+ info.source_type = VKD3D_SHADER_SOURCE_D3D_BYTECODE; -+ ret = d3dbc_parse(&info, config_flags, message_context, &program); -+ } -+ else -+ { -+ if ((ret = hlsl_emit_bytecode(&ctx, entry_func, VKD3D_SHADER_TARGET_DXBC_TPF, &info.source)) < 0) -+ goto done; -+ info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; -+ ret = tpf_parse(&info, config_flags, message_context, &program); -+ } -+ if (ret >= 0) -+ { -+ ret = vsir_program_compile(&program, config_flags, &info, out, message_context); -+ vsir_program_cleanup(&program); -+ } -+ vkd3d_shader_free_shader_code(&info.source); -+ } -+ else -+ { -+ ret = hlsl_emit_bytecode(&ctx, entry_func, target_type, out); -+ } - -+done: - hlsl_ctx_cleanup(&ctx); - return ret; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index df0a53b20de..27814f3a56f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -78,7 +78,18 @@ enum hlsl_type_class - HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, - HLSL_CLASS_STRUCT, - HLSL_CLASS_ARRAY, -- HLSL_CLASS_OBJECT, -+ HLSL_CLASS_DEPTH_STENCIL_VIEW, -+ HLSL_CLASS_EFFECT_GROUP, -+ HLSL_CLASS_PASS, -+ HLSL_CLASS_PIXEL_SHADER, -+ HLSL_CLASS_RENDER_TARGET_VIEW, -+ HLSL_CLASS_SAMPLER, -+ HLSL_CLASS_STRING, -+ HLSL_CLASS_TECHNIQUE, -+ HLSL_CLASS_TEXTURE, -+ HLSL_CLASS_UAV, -+ HLSL_CLASS_VERTEX_SHADER, -+ HLSL_CLASS_VOID, - }; - - enum hlsl_base_type -@@ -90,18 +101,6 @@ enum hlsl_base_type - HLSL_TYPE_UINT, - HLSL_TYPE_BOOL, - HLSL_TYPE_LAST_SCALAR = HLSL_TYPE_BOOL, -- HLSL_TYPE_SAMPLER, -- HLSL_TYPE_TEXTURE, -- HLSL_TYPE_UAV, -- HLSL_TYPE_PIXELSHADER, -- HLSL_TYPE_VERTEXSHADER, -- HLSL_TYPE_PASS, -- HLSL_TYPE_RENDERTARGETVIEW, -- HLSL_TYPE_DEPTHSTENCILVIEW, -- HLSL_TYPE_TECHNIQUE, -- HLSL_TYPE_EFFECT_GROUP, -- HLSL_TYPE_STRING, -- HLSL_TYPE_VOID, - }; - - enum hlsl_sampler_dim -@@ -143,17 +142,11 @@ struct hlsl_type - struct rb_entry scope_entry; - - enum hlsl_type_class class; -- /* If class is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. -- * If class is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. -- * If class is HLSL_CLASS_OBJECT and base_type is HLSL_TYPE_TECHNIQUE, additional version -- * field is used to distinguish between technique types. -- * Otherwise, base_type is not used. */ -- enum hlsl_base_type base_type; -- -- /* If base_type is HLSL_TYPE_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. -- * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim can be any value of the enum except -+ -+ /* If class is HLSL_CLASS_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. -+ * If class is HLSL_CLASS_TEXTURE, then sampler_dim can be any value of the enum except - * HLSL_SAMPLER_DIM_GENERIC and HLSL_SAMPLER_DIM_COMPARISON. -- * If base_type is HLSL_TYPE_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, -+ * If class is HLSL_CLASS_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, - * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, HLSL_SAMPLER_DIM_2DARRAY, - * HLSL_SAMPLER_DIM_BUFFER, or HLSL_SAMPLER_DIM_STRUCTURED_BUFFER. - * Otherwise, sampler_dim is not used */ -@@ -171,11 +164,7 @@ struct hlsl_type - * If type is HLSL_CLASS_MATRIX, then dimx is the number of columns, and dimy the number of rows. - * If type is HLSL_CLASS_ARRAY, then dimx and dimy have the same value as in the type of the array elements. - * If type is HLSL_CLASS_STRUCT, then dimx is the sum of (dimx * dimy) of every component, and dimy = 1. -- * If type is HLSL_CLASS_OBJECT, dimx and dimy depend on the base_type: -- * If base_type is HLSL_TYPE_SAMPLER, then both dimx = 1 and dimy = 1. -- * If base_type is HLSL_TYPE_TEXTURE, then dimx = 4 and dimy = 1. -- * If base_type is HLSL_TYPE_UAV, then dimx is the dimx of e.resource_format, and dimy = 1. -- * Otherwise both dimx = 1 and dimy = 1. */ -+ */ - unsigned int dimx; - unsigned int dimy; - /* Sample count for HLSL_SAMPLER_DIM_2DMS or HLSL_SAMPLER_DIM_2DMSARRAY. */ -@@ -183,6 +172,11 @@ struct hlsl_type - - union - { -+ /* Additional information if type is numeric. */ -+ struct -+ { -+ enum hlsl_base_type type; -+ } numeric; - /* Additional information if type is HLSL_CLASS_STRUCT. */ - struct - { -@@ -196,8 +190,8 @@ struct hlsl_type - /* Array length, or HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT if it is not known yet at parse time. */ - unsigned int elements_count; - } array; -- /* Additional information if the base_type is HLSL_TYPE_TEXTURE or -- * HLSL_TYPE_UAV. */ -+ /* Additional information if the class is HLSL_CLASS_TEXTURE or -+ * HLSL_CLASS_UAV. */ - struct - { - /* Format of the data contained within the type. */ -@@ -298,6 +292,7 @@ enum hlsl_ir_node_type - HLSL_IR_STORE, - HLSL_IR_SWIZZLE, - HLSL_IR_SWITCH, -+ HLSL_IR_STATEBLOCK_CONSTANT, - }; - - /* Common data for every type of IR instruction node. */ -@@ -374,6 +369,8 @@ struct hlsl_attribute - #define HLSL_STORAGE_CENTROID 0x00004000 - #define HLSL_STORAGE_NOPERSPECTIVE 0x00008000 - #define HLSL_STORAGE_LINEAR 0x00010000 -+#define HLSL_MODIFIER_SINGLE 0x00020000 -+#define HLSL_MODIFIER_EXPORT 0x00040000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -393,7 +390,7 @@ struct hlsl_attribute - struct hlsl_reg_reservation - { - char reg_type; -- unsigned int reg_index; -+ unsigned int reg_space, reg_index; - - char offset_type; - unsigned int offset_index; -@@ -421,6 +418,14 @@ struct hlsl_ir_var - /* Scope that contains annotations for this variable. */ - struct hlsl_scope *annotations; - -+ /* A dynamic array containing the state block on the variable's declaration, if any. -+ * An array variable may contain multiple state blocks. -+ * A technique pass will always contain one. -+ * These are only really used for effect profiles. */ -+ struct hlsl_state_block **state_blocks; -+ unsigned int state_block_count; -+ size_t state_block_capacity; -+ - /* Indexes of the IR instructions where the variable is first written and last read (liveness - * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 - * means function entry. */ -@@ -442,9 +447,10 @@ struct hlsl_ir_var - enum hlsl_sampler_dim sampler_dim; - struct vkd3d_shader_location first_sampler_dim_loc; - } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; -- /* Minimum number of binds required to include all object components actually used in the shader. -- * It may be less than the allocation size, e.g. for texture arrays. */ -- unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; -+ /* Minimum number of binds required to include all components actually used in the shader. -+ * It may be less than the allocation size, e.g. for texture arrays. -+ * The bind_count for HLSL_REGSET_NUMERIC is only used in uniforms for now. */ -+ unsigned int bind_count[HLSL_REGSET_LAST + 1]; - - /* Whether the shader performs dereferences with non-constant offsets in the variable. */ - bool indexable; -@@ -456,6 +462,40 @@ struct hlsl_ir_var - uint32_t is_separated_resource : 1; - }; - -+/* This struct is used to represent assignments in state block entries: -+ * name = {args[0], args[1], ...}; -+ * - or - -+ * name = args[0] -+ * - or - -+ * name[lhs_index] = args[0] -+ * - or - -+ * name[lhs_index] = {args[0], args[1], ...}; -+ */ -+struct hlsl_state_block_entry -+{ -+ /* For assignments, the name in the lhs. */ -+ char *name; -+ /* Resolved format-specific property identifier. */ -+ unsigned int name_id; -+ -+ /* Whether the lhs in the assignment is indexed and, in that case, its index. */ -+ bool lhs_has_index; -+ unsigned int lhs_index; -+ -+ /* Instructions present in the rhs. */ -+ struct hlsl_block *instrs; -+ -+ /* For assignments, arguments of the rhs initializer. */ -+ struct hlsl_src *args; -+ unsigned int args_count; -+}; -+ -+struct hlsl_state_block -+{ -+ struct hlsl_state_block_entry **entries; -+ size_t count, capacity; -+}; -+ - /* Sized array of variables representing a function's parameters. */ - struct hlsl_func_parameters - { -@@ -593,18 +633,15 @@ enum hlsl_ir_expr_op - HLSL_OP2_MUL, - HLSL_OP2_NEQUAL, - HLSL_OP2_RSHIFT, -+ /* SLT(a, b) retrieves 1.0 if (a < b), else 0.0. Only used for SM1-SM3 target vertex shaders. */ -+ HLSL_OP2_SLT, - - /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, - * then adds c. */ - HLSL_OP3_DP2ADD, -- /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. -- * TERNARY(a, b, c) returns c if a == 0 and b otherwise. -- * They differ for floating point numbers, because -- * -0.0 == 0.0, but it is not bitwise zero. CMP(a, b, c) returns b -- if a >= 0, and c otherwise. It's used only for SM1-SM3 targets, while -- SM4+ is using MOVC in such cases. */ -+ /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. -+ * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ - HLSL_OP3_CMP, -- HLSL_OP3_MOVC, - HLSL_OP3_TERNARY, - }; - -@@ -750,6 +787,14 @@ struct hlsl_ir_constant - struct hlsl_reg reg; - }; - -+/* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, -+ * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ -+struct hlsl_ir_stateblock_constant -+{ -+ struct hlsl_ir_node node; -+ char *name; -+}; -+ - struct hlsl_scope - { - /* Item entry for hlsl_ctx.scopes. */ -@@ -798,10 +843,13 @@ struct hlsl_buffer - struct vkd3d_shader_location loc; - enum hlsl_buffer_type type; - const char *name; -+ uint32_t modifiers; - /* Register reserved for this buffer, if any. - * If provided, it should be of type 'b' if type is HLSL_BUFFER_CONSTANT and 't' if type is - * HLSL_BUFFER_TEXTURE. */ - struct hlsl_reg_reservation reservation; -+ /* Scope that contains annotations for this buffer. */ -+ struct hlsl_scope *annotations; - /* Item entry for hlsl_ctx.buffers */ - struct list entry; - -@@ -920,8 +968,21 @@ struct hlsl_ctx - uint32_t found_numthreads : 1; - - bool semantic_compat_mapping; -+ bool child_effect; -+ bool include_empty_buffers; -+ bool warn_implicit_truncation; - }; - -+static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); -+} -+ -+static inline bool hlsl_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return !hlsl_version_ge(ctx, major, minor); -+} -+ - struct hlsl_resource_load_params - { - struct hlsl_type *format; -@@ -1009,6 +1070,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n - return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); - } - -+static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) -+{ -+ assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); -+ return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); -+} -+ - static inline void hlsl_block_init(struct hlsl_block *block) - { - list_init(&block->instrs); -@@ -1183,6 +1250,7 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const - - void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); - -+void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); - int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); -@@ -1201,6 +1269,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); - void hlsl_free_attribute(struct hlsl_attribute *attr); - void hlsl_free_instr(struct hlsl_ir_node *node); - void hlsl_free_instr_list(struct list *list); -+void hlsl_free_state_block(struct hlsl_state_block *state_block); - void hlsl_free_type(struct hlsl_type *type); - void hlsl_free_var(struct hlsl_ir_var *decl); - -@@ -1222,7 +1291,8 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - struct hlsl_ir_node *arg2); - struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); - struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, -- const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); -+ uint32_t modifiers, const struct hlsl_reg_reservation *reservation, struct hlsl_scope *annotations, -+ const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, -@@ -1243,6 +1313,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond - struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, - enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -+ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); - - void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); - -@@ -1279,6 +1351,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - struct hlsl_struct_field *fields, size_t field_count); - struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, -+ struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, - struct hlsl_type *type, const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -@@ -1330,7 +1404,6 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int - bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); - - void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx); --void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block); - - const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); - unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); -@@ -1352,10 +1425,13 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - - bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); - bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); -+bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), - struct hlsl_block *block, void *context); - -+D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); -+D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); - bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); - bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 558506db108..88b917eff11 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -76,6 +76,7 @@ case {return KW_CASE; } - cbuffer {return KW_CBUFFER; } - centroid {return KW_CENTROID; } - column_major {return KW_COLUMN_MAJOR; } -+ComputeShader {return KW_COMPUTESHADER; } - compile {return KW_COMPILE; } - const {return KW_CONST; } - continue {return KW_CONTINUE; } -@@ -83,15 +84,18 @@ DepthStencilState {return KW_DEPTHSTENCILSTATE; } - DepthStencilView {return KW_DEPTHSTENCILVIEW; } - default {return KW_DEFAULT; } - discard {return KW_DISCARD; } -+DomainShader {return KW_DOMAINSHADER; } - do {return KW_DO; } - double {return KW_DOUBLE; } - else {return KW_ELSE; } -+export {return KW_EXPORT; } - extern {return KW_EXTERN; } - false {return KW_FALSE; } - for {return KW_FOR; } - fxgroup {return KW_FXGROUP; } - GeometryShader {return KW_GEOMETRYSHADER; } - groupshared {return KW_GROUPSHARED; } -+HullShader {return KW_HULLSHADER; } - if {return KW_IF; } - in {return KW_IN; } - inline {return KW_INLINE; } -@@ -105,7 +109,7 @@ out {return KW_OUT; } - packoffset {return KW_PACKOFFSET; } - pass {return KW_PASS; } - PixelShader {return KW_PIXELSHADER; } --precise {return KW_PRECISE; } -+pixelshader {return KW_PIXELSHADER; } - RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } - RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } - RasterizerOrderedTexture1D {return KW_RASTERIZERORDEREDTEXTURE1D; } -@@ -163,6 +167,7 @@ typedef {return KW_TYPEDEF; } - uniform {return KW_UNIFORM; } - vector {return KW_VECTOR; } - VertexShader {return KW_VERTEXSHADER; } -+vertexshader {return KW_VERTEXSHADER; } - void {return KW_VOID; } - volatile {return KW_VOLATILE; } - while {return KW_WHILE; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index cd05fd008a6..9c1bdef926d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -77,6 +77,10 @@ struct parse_variable_def - struct hlsl_type *basic_type; - uint32_t modifiers; - struct vkd3d_shader_location modifiers_loc; -+ -+ struct hlsl_state_block **state_blocks; -+ unsigned int state_block_count; -+ size_t state_block_capacity; - }; - - struct parse_function -@@ -114,6 +118,12 @@ struct parse_attribute_list - const struct hlsl_attribute **attrs; - }; - -+struct state_block_index -+{ -+ bool has_index; -+ unsigned int index; -+}; -+ - } - - %code provides -@@ -158,6 +168,9 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) - - static void destroy_block(struct hlsl_block *block) - { -+ if (!block) -+ return; -+ - hlsl_block_cleanup(block); - vkd3d_free(block); - } -@@ -413,7 +426,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - return NULL; - } - -- if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) -+ if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy && ctx->warn_implicit_truncation) - hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", - src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); - -@@ -438,8 +451,9 @@ static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t - - static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) - { -- struct hlsl_ir_node *condition, *not, *iff, *jump; -+ struct hlsl_ir_node *condition, *cast, *not, *iff, *jump; - struct hlsl_block then_block; -+ struct hlsl_type *bool_type; - - /* E.g. "for (i = 0; ; ++i)". */ - if (list_empty(&cond_block->instrs)) -@@ -449,7 +463,12 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co - - check_condition_type(ctx, condition); - -- if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) -+ bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); -+ if (!(cast = hlsl_new_cast(ctx, condition, bool_type, &condition->loc))) -+ return false; -+ hlsl_block_add_instr(cond_block, cast); -+ -+ if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc))) - return false; - hlsl_block_add_instr(cond_block, not); - -@@ -640,6 +659,16 @@ static unsigned int initializer_size(const struct parse_initializer *initializer - return count; - } - -+static void cleanup_parse_attribute_list(struct parse_attribute_list *attr_list) -+{ -+ unsigned int i = 0; -+ -+ assert(attr_list); -+ for (i = 0; i < attr_list->count; ++i) -+ hlsl_free_attribute((struct hlsl_attribute *) attr_list->attrs[i]); -+ vkd3d_free(attr_list->attrs); -+} -+ - static void free_parse_initializer(struct parse_initializer *initializer) - { - destroy_block(initializer->instrs); -@@ -817,8 +846,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str - const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; - struct hlsl_ir_node *return_index, *cast; - -- if (expr_type->class == HLSL_CLASS_OBJECT -- && (expr_type->base_type == HLSL_TYPE_TEXTURE || expr_type->base_type == HLSL_TYPE_UAV) -+ if ((expr_type->class == HLSL_CLASS_TEXTURE || expr_type->class == HLSL_CLASS_UAV) - && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); -@@ -925,24 +953,10 @@ static void free_parse_variable_def(struct parse_variable_def *v) - vkd3d_free(v->arrays.sizes); - vkd3d_free(v->name); - hlsl_cleanup_semantic(&v->semantic); -+ assert(!v->state_blocks); - vkd3d_free(v); - } - --static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) --{ -- return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; --} -- --static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) --{ -- return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); --} -- --static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) --{ -- return !shader_profile_version_ge(ctx, major, minor); --} -- - static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - struct hlsl_type *type, uint32_t modifiers, struct list *defs) - { -@@ -965,7 +979,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - - field->type = type; - -- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) -+ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) - { - for (k = 0; k < v->arrays.count; ++k) - unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -1115,7 +1129,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters - } - - static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations, -- const struct vkd3d_shader_location *loc) -+ struct hlsl_state_block *state_block, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_var *var; - struct hlsl_type *type; -@@ -1125,6 +1139,11 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope * - return false; - var->annotations = annotations; - -+ var->state_blocks = hlsl_alloc(ctx, sizeof(*var->state_blocks)); -+ var->state_blocks[0] = state_block; -+ var->state_block_count = 1; -+ var->state_block_capacity = 1; -+ - if (!hlsl_add_var(ctx, var, false)) - { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -@@ -1191,17 +1210,18 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl - return true; - } - --static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) -+static bool parse_reservation_index(const char *string, char *type, uint32_t *index) - { -- struct hlsl_reg_reservation reservation = {0}; -+ if (!sscanf(string + 1, "%u", index)) -+ return false; - -- if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) -- { -- FIXME("Unsupported register reservation syntax.\n"); -- return reservation; -- } -- reservation.reg_type = ascii_tolower(reg_string[0]); -- return reservation; -+ *type = ascii_tolower(string[0]); -+ return true; -+} -+ -+static bool parse_reservation_space(const char *string, uint32_t *space) -+{ -+ return !ascii_strncasecmp(string, "space", 5) && sscanf(string + 5, "%u", space); - } - - static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, -@@ -1210,7 +1230,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const - struct hlsl_reg_reservation reservation = {0}; - char *endptr; - -- if (shader_profile_version_lt(ctx, 4, 0)) -+ if (hlsl_version_lt(ctx, 4, 0)) - return reservation; - - reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); -@@ -1273,7 +1293,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - struct hlsl_ir_node *node; - struct hlsl_block expr; - unsigned int ret = 0; -- bool progress; -+ struct hlsl_src src; - - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) - { -@@ -1293,6 +1313,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_STORE: - case HLSL_IR_SWITCH: -+ case HLSL_IR_STATEBLOCK_CONSTANT: - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); - } -@@ -1309,13 +1330,12 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - return 0; - } - -- do -- { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, &expr); -- } while (progress); -+ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -+ hlsl_src_from_node(&src, node_from_block(&expr)); -+ hlsl_run_const_passes(ctx, &expr); -+ node = src.node; -+ hlsl_src_remove(&src); - -- node = node_from_block(&expr); - if (node->type == HLSL_IR_CONSTANT) - { - constant = hlsl_ir_constant(node); -@@ -1334,9 +1354,6 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) - { -- if (t1->base_type > HLSL_TYPE_LAST_SCALAR || t2->base_type > HLSL_TYPE_LAST_SCALAR) -- return false; -- - /* Scalar vars can be converted to pretty much everything */ - if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) - return true; -@@ -1368,10 +1385,6 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t - - static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hlsl_base_type t2) - { -- if (t1 > HLSL_TYPE_LAST_SCALAR || t2 > HLSL_TYPE_LAST_SCALAR) { -- FIXME("Unexpected base type.\n"); -- return HLSL_TYPE_FLOAT; -- } - if (t1 == t2) - return t1 == HLSL_TYPE_BOOL ? HLSL_TYPE_INT : t1; - if (t1 == HLSL_TYPE_DOUBLE || t2 == HLSL_TYPE_DOUBLE) -@@ -1475,7 +1488,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl - struct hlsl_ir_node *load; - struct hlsl_ir_var *var; - -- scalar_type = hlsl_get_scalar_type(ctx, type->base_type); -+ scalar_type = hlsl_get_scalar_type(ctx, type->e.numeric.type); - - if (!(var = hlsl_new_synthetic_var(ctx, "split_op", type, loc))) - return NULL; -@@ -1525,7 +1538,7 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * - const struct hlsl_type *type = instr->data_type; - struct vkd3d_string_buffer *string; - -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: -@@ -1575,13 +1588,13 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct - static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, - const struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); - enum hlsl_type_class type; -+ enum hlsl_base_type base; - unsigned int dimx, dimy; - - if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) - return NULL; -- -+ base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); - return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - } - -@@ -1618,14 +1631,15 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str - const struct vkd3d_shader_location *loc) - { - struct hlsl_type *common_type, *return_type; -- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); - enum hlsl_type_class type; -+ enum hlsl_base_type base; - unsigned int dimx, dimy; - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - - if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) - return NULL; - -+ base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); - common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -@@ -1665,7 +1679,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type base = arg1->data_type->base_type; -+ enum hlsl_base_type base = arg1->data_type->e.numeric.type; - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *return_type, *integer_type; - enum hlsl_type_class type; -@@ -1695,7 +1709,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h - static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); -+ enum hlsl_base_type base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *common_type, *ret_type; - enum hlsl_ir_expr_op op; -@@ -1933,10 +1947,9 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - return NULL; - - resource_type = hlsl_deref_get_type(ctx, &resource_deref); -- assert(resource_type->class == HLSL_CLASS_OBJECT); -- assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV); -+ assert(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); - -- if (resource_type->base_type != HLSL_TYPE_UAV) -+ if (resource_type->class != HLSL_CLASS_UAV) - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Read-only resources cannot be stored to."); - -@@ -1947,7 +1960,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - "Resource store expressions must write to all components."); - - assert(coords->data_type->class == HLSL_CLASS_VECTOR); -- assert(coords->data_type->base_type == HLSL_TYPE_UINT); -+ assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); - assert(coords->data_type->dimx == dim_count); - - if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) -@@ -2085,24 +2098,23 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - } - } - --static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) -+static bool type_has_object_components(const struct hlsl_type *type) - { -- if (type->class == HLSL_CLASS_OBJECT) -- return !must_be_in_struct; - if (type->class == HLSL_CLASS_ARRAY) -- return type_has_object_components(type->e.array.type, must_be_in_struct); -+ return type_has_object_components(type->e.array.type); - - if (type->class == HLSL_CLASS_STRUCT) - { -- unsigned int i; -- -- for (i = 0; i < type->e.record.field_count; ++i) -+ for (unsigned int i = 0; i < type->e.record.field_count; ++i) - { -- if (type_has_object_components(type->e.record.fields[i].type, false)) -+ if (type_has_object_components(type->e.record.fields[i].type)) - return true; - } -+ -+ return false; - } -- return false; -+ -+ return !hlsl_is_numeric_type(type); - } - - static bool type_has_numeric_components(struct hlsl_type *type) -@@ -2140,6 +2152,18 @@ static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int mo - } - } - -+static void check_invalid_object_fields(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) -+{ -+ const struct hlsl_type *type = var->data_type; -+ -+ while (type->class == HLSL_CLASS_ARRAY) -+ type = type->e.array.type; -+ -+ if (type->class == HLSL_CLASS_STRUCT && type_has_object_components(type)) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Target profile doesn't support objects as struct members in uniform variables."); -+} -+ - static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - { - struct hlsl_type *basic_type = v->basic_type; -@@ -2160,7 +2184,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - - type = basic_type; - -- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) -+ if (hlsl_version_ge(ctx, 5, 1) && hlsl_type_is_resource(type)) - { - for (i = 0; i < v->arrays.count; ++i) - unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -2265,12 +2289,8 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - if (!(modifiers & HLSL_STORAGE_STATIC)) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - -- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && -- type_has_object_components(var->data_type, true)) -- { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Target profile doesn't support objects as struct members in uniform variables."); -- } -+ if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ check_invalid_object_fields(ctx, var); - - if ((func = hlsl_get_first_func_decl(ctx, var->name))) - { -@@ -2306,7 +2326,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - } - - if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -- && type_has_object_components(var->data_type, false)) -+ && type_has_object_components(var->data_type)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Static variables cannot have both numeric and resource components."); -@@ -2349,8 +2369,25 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - free_parse_variable_def(v); - continue; - } -+ - type = var->data_type; - -+ var->state_blocks = v->state_blocks; -+ var->state_block_count = v->state_block_count; -+ var->state_block_capacity = v->state_block_capacity; -+ v->state_block_count = 0; -+ v->state_block_capacity = 0; -+ v->state_blocks = NULL; -+ -+ if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected %u state blocks, but got %u.", -+ hlsl_type_component_count(type), var->state_block_count); -+ free_parse_variable_def(v); -+ continue; -+ } -+ - if (v->initializer.args_count) - { - if (v->initializer.braces) -@@ -2394,7 +2431,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - - /* Initialize statics to zero by default. */ - -- if (type_has_object_components(var->data_type, false)) -+ if (type_has_object_components(var->data_type)) - { - free_parse_variable_def(v); - continue; -@@ -2562,7 +2599,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, - { - struct hlsl_type *type = arg->data_type; - -- if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) -+ if (type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF) - return arg; - - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -@@ -2589,7 +2626,7 @@ static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *p - static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type base = params->args[0]->data_type->base_type; -+ enum hlsl_base_type base = params->args[0]->data_type->e.numeric.type; - bool vectors = false, matrices = false; - unsigned int dimx = 4, dimy = 4; - struct hlsl_type *common_type; -@@ -2599,7 +2636,7 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * - { - struct hlsl_type *arg_type = params->args[i]->data_type; - -- base = expr_common_base_type(base, arg_type->base_type); -+ base = expr_common_base_type(base, arg_type->e.numeric.type); - - if (arg_type->class == HLSL_CLASS_VECTOR) - { -@@ -2650,12 +2687,14 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, - static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -+ enum hlsl_base_type base_type; - struct hlsl_type *type; - - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; - -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ base_type = type->e.numeric.type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; -+ type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); - - return convert_args(ctx, params, type, loc); - } -@@ -2715,81 +2754,62 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, - return write_acos_or_asin(ctx, params, loc, false); - } - --static bool intrinsic_all(struct hlsl_ctx *ctx, -- const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+/* Find the type corresponding to the given source type, with the same -+ * dimensions but a different base type. */ -+static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, -+ const struct hlsl_type *type, enum hlsl_base_type base_type) -+{ -+ return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); -+} -+ -+static bool add_combine_components(struct hlsl_ctx *ctx, const struct parse_initializer *params, -+ struct hlsl_ir_node *arg, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; -+ struct hlsl_ir_node *res, *load; - unsigned int i, count; - -- if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, one); -+ count = hlsl_type_component_count(arg->data_type); - -- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) -+ if (!(res = hlsl_add_load_component(ctx, params->instrs, arg, 0, loc))) - return false; -- hlsl_block_add_instr(params->instrs, zero); - -- mul = one; -- -- count = hlsl_type_component_count(arg->data_type); -- for (i = 0; i < count; ++i) -+ for (i = 1; i < count; ++i) - { - if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - -- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) -- return false; -+ if (!(res = hlsl_new_binary_expr(ctx, op, res, load))) -+ return NULL; -+ hlsl_block_add_instr(params->instrs, res); - } - -- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); -+ return true; - } - --static bool intrinsic_any(struct hlsl_ctx *ctx, -+static bool intrinsic_all(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; -- unsigned int i, count; -+ struct hlsl_ir_node *arg = params->args[0], *cast; -+ struct hlsl_type *bool_type; - -- if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) -- { -- hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); -+ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); -+ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) - return false; -- } -- -- if (arg->data_type->base_type == HLSL_TYPE_FLOAT) -- { -- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, zero); - -- if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) -- return false; -- -- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); -- } -- else if (arg->data_type->base_type == HLSL_TYPE_BOOL) -- { -- if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) -- return false; -- hlsl_block_add_instr(params->instrs, bfalse); -- -- or = bfalse; -- -- count = hlsl_type_component_count(arg->data_type); -- for (i = 0; i < count; ++i) -- { -- if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) -- return false; -+ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_AND, loc); -+} - -- if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) -- return false; -- } -+static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, -+ const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg = params->args[0], *cast; -+ struct hlsl_type *bool_type; - -- return true; -- } -+ bool_type = convert_numeric_type(ctx, arg->data_type, HLSL_TYPE_BOOL); -+ if (!(cast = add_cast(ctx, params->instrs, arg, bool_type, loc))) -+ return false; - -- hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); -- return false; -+ return add_combine_components(ctx, params, cast, HLSL_OP2_LOGIC_OR, loc); - } - - static bool intrinsic_asin(struct hlsl_ctx *ctx, -@@ -2857,20 +2877,20 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, - type->name, type->name, type->name); - if (ret < 0) - { -- vkd3d_string_buffer_cleanup(buf); -+ hlsl_release_string_buffer(ctx, buf); - return false; - } - - ret = vkd3d_string_buffer_printf(buf, body_template, type->name); - if (ret < 0) - { -- vkd3d_string_buffer_cleanup(buf); -+ hlsl_release_string_buffer(ctx, buf); - return false; - } - - func = hlsl_compile_internal_function(ctx, - atan2_mode ? atan2_name : atan_name, buf->buffer); -- vkd3d_string_buffer_cleanup(buf); -+ hlsl_release_string_buffer(ctx, buf); - if (!func) - return false; - -@@ -2890,15 +2910,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx, - return write_atan_or_atan2(ctx, params, loc, true); - } - -- --/* Find the type corresponding to the given source type, with the same -- * dimensions but a different base type. */ --static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, -- const struct hlsl_type *type, enum hlsl_base_type base_type) --{ -- return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); --} -- - static bool intrinsic_asfloat(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2906,7 +2917,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, - struct hlsl_type *data_type; - - data_type = params->args[0]->data_type; -- if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) -+ if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) - { - struct vkd3d_string_buffer *string; - -@@ -2942,7 +2953,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, - } - - data_type = params->args[0]->data_type; -- if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) -+ if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) - { - struct vkd3d_string_buffer *string; - -@@ -3022,6 +3033,46 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); - } - -+static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool sinh_mode) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_ir_node *arg; -+ const char *fn_name, *type_name; -+ char *body; -+ -+ static const char template[] = -+ "%s %s(%s x)\n" -+ "{\n" -+ " return (exp(x) %s exp(-x)) / 2;\n" -+ "}\n"; -+ static const char fn_name_sinh[] = "sinh"; -+ static const char fn_name_cosh[] = "cosh"; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ type_name = arg->data_type->name; -+ fn_name = sinh_mode ? fn_name_sinh : fn_name_cosh; -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type_name, fn_name, type_name, sinh_mode ? "-" : "+"))) -+ return false; -+ -+ func = hlsl_compile_internal_function(ctx, fn_name, body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ -+static bool intrinsic_cosh(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return write_cosh_or_sinh(ctx, params, loc, false); -+} -+ - static bool intrinsic_cross(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3031,7 +3082,7 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, - struct hlsl_type *cast_type; - enum hlsl_base_type base; - -- if (arg1->data_type->base_type == HLSL_TYPE_HALF && arg2->data_type->base_type == HLSL_TYPE_HALF) -+ if (arg1->data_type->e.numeric.type == HLSL_TYPE_HALF && arg2->data_type->e.numeric.type == HLSL_TYPE_HALF) - base = HLSL_TYPE_HALF; - else - base = HLSL_TYPE_FLOAT; -@@ -3155,6 +3206,94 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); - } - -+static bool intrinsic_determinant(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ static const char determinant2x2[] = -+ "%s determinant(%s2x2 m)\n" -+ "{\n" -+ " return m._11 * m._22 - m._12 * m._21;\n" -+ "}"; -+ static const char determinant3x3[] = -+ "%s determinant(%s3x3 m)\n" -+ "{\n" -+ " %s2x2 m1 = { m._22, m._23, m._32, m._33 };\n" -+ " %s2x2 m2 = { m._21, m._23, m._31, m._33 };\n" -+ " %s2x2 m3 = { m._21, m._22, m._31, m._32 };\n" -+ " %s3 v1 = { m._11, -m._12, m._13 };\n" -+ " %s3 v2 = { determinant(m1), determinant(m2), determinant(m3) };\n" -+ " return dot(v1, v2);\n" -+ "}"; -+ static const char determinant4x4[] = -+ "%s determinant(%s4x4 m)\n" -+ "{\n" -+ " %s3x3 m1 = { m._22, m._23, m._24, m._32, m._33, m._34, m._42, m._43, m._44 };\n" -+ " %s3x3 m2 = { m._21, m._23, m._24, m._31, m._33, m._34, m._41, m._43, m._44 };\n" -+ " %s3x3 m3 = { m._21, m._22, m._24, m._31, m._32, m._34, m._41, m._42, m._44 };\n" -+ " %s3x3 m4 = { m._21, m._22, m._23, m._31, m._32, m._33, m._41, m._42, m._43 };\n" -+ " %s4 v1 = { m._11, -m._12, m._13, -m._14 };\n" -+ " %s4 v2 = { determinant(m1), determinant(m2), determinant(m3), determinant(m4) };\n" -+ " return dot(v1, v2);\n" -+ "}"; -+ static const char *templates[] = -+ { -+ [2] = determinant2x2, -+ [3] = determinant3x3, -+ [4] = determinant4x4, -+ }; -+ -+ struct hlsl_ir_node *arg = params->args[0]; -+ const struct hlsl_type *type = arg->data_type; -+ struct hlsl_ir_function_decl *func; -+ const char *typename, *template; -+ unsigned int dim; -+ char *body; -+ -+ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_MATRIX) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); -+ return false; -+ } -+ -+ dim = min(type->dimx, type->dimy); -+ if (dim == 1) -+ { -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) -+ return false; -+ return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); -+ } -+ -+ typename = type->e.numeric.type == HLSL_TYPE_HALF ? "half" : "float"; -+ template = templates[dim]; -+ -+ switch (dim) -+ { -+ case 2: -+ body = hlsl_sprintf_alloc(ctx, template, typename, typename); -+ break; -+ case 3: -+ body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, -+ typename, typename, typename, typename); -+ break; -+ case 4: -+ body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, -+ typename, typename, typename, typename, typename); -+ break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (!body) -+ return false; -+ -+ func = hlsl_compile_internal_function(ctx, "determinant", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ - static bool intrinsic_distance(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3478,7 +3617,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1], *cast1, *cast2; -- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); -+ enum hlsl_base_type base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); - struct hlsl_type *cast_type1 = arg1->data_type, *cast_type2 = arg2->data_type, *matrix_type, *ret_type; - unsigned int i, j, k, vect_count = 0; - struct hlsl_deref var_deref; -@@ -3646,6 +3785,59 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, i, neg, loc); - } - -+static bool intrinsic_refract(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_type *r_type = params->args[0]->data_type; -+ struct hlsl_type *n_type = params->args[1]->data_type; -+ struct hlsl_type *i_type = params->args[2]->data_type; -+ struct hlsl_type *res_type, *idx_type, *scal_type; -+ struct parse_initializer mut_params; -+ struct hlsl_ir_function_decl *func; -+ enum hlsl_base_type base; -+ char *body; -+ -+ static const char template[] = -+ "%s refract(%s r, %s n, %s i)\n" -+ "{\n" -+ " %s d, t;\n" -+ " d = dot(r, n);\n" -+ " t = 1 - i.x * i.x * (1 - d * d);\n" -+ " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" -+ "}"; -+ -+ if (r_type->class == HLSL_CLASS_MATRIX -+ || n_type->class == HLSL_CLASS_MATRIX -+ || i_type->class == HLSL_CLASS_MATRIX) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Matrix arguments are not supported."); -+ return false; -+ } -+ -+ assert(params->args_count == 3); -+ mut_params = *params; -+ mut_params.args_count = 2; -+ if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) -+ return false; -+ -+ base = expr_common_base_type(res_type->e.numeric.type, i_type->e.numeric.type); -+ base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; -+ res_type = convert_numeric_type(ctx, res_type, base); -+ idx_type = convert_numeric_type(ctx, i_type, base); -+ scal_type = hlsl_get_scalar_type(ctx, base); -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, res_type->name, res_type->name, -+ res_type->name, idx_type->name, scal_type->name))) -+ return false; -+ -+ func = hlsl_compile_internal_function(ctx, "refract", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ - static bool intrinsic_round(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3688,7 +3880,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, - struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, - arg->data_type->dimx, arg->data_type->dimy); - -- if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) -+ if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc))) - return false; - hlsl_block_add_instr(params->instrs, zero); - -@@ -3726,6 +3918,12 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); - } - -+static bool intrinsic_sinh(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return write_cosh_or_sinh(ctx, params, loc, true); -+} -+ - /* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ - static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -@@ -3798,6 +3996,39 @@ static bool intrinsic_tan(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, sin, cos, loc); - } - -+static bool intrinsic_tanh(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_ir_node *arg; -+ struct hlsl_type *type; -+ char *body; -+ -+ static const char template[] = -+ "%s tanh(%s x)\n" -+ "{\n" -+ " %s exp_pos, exp_neg;\n" -+ " exp_pos = exp(x);\n" -+ " exp_neg = exp(-x);\n" -+ " return (exp_pos - exp_neg) / (exp_pos + exp_neg);\n" -+ "}\n"; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ type = arg->data_type; -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type->name, type->name, type->name))) -+ return false; -+ -+ func = hlsl_compile_internal_function(ctx, "tanh", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ - static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, - const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) - { -@@ -3818,7 +4049,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - } - - sampler_type = params->args[0]->data_type; -- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -+ if (sampler_type->class != HLSL_CLASS_SAMPLER - || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) - { - struct vkd3d_string_buffer *string; -@@ -3866,7 +4097,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - return false; - } - -- if (shader_profile_version_ge(ctx, 4, 0)) -+ if (hlsl_version_ge(ctx, 4, 0)) - { - unsigned int count = hlsl_sampler_dim_count(dim); - struct hlsl_ir_node *divisor; -@@ -3913,7 +4144,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - return false; - - initialize_var_components(ctx, params->instrs, var, &idx, coords); -- if (shader_profile_version_ge(ctx, 4, 0)) -+ if (hlsl_version_ge(ctx, 4, 0)) - { - if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) - return false; -@@ -4022,7 +4253,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - return true; - } - -- mat_type = hlsl_get_matrix_type(ctx, arg_type->base_type, arg_type->dimy, arg_type->dimx); -+ mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->dimy, arg_type->dimx); - - if (!(var = hlsl_new_synthetic_var(ctx, "transpose", mat_type, loc))) - return false; -@@ -4099,7 +4330,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) - return false; - -- if (shader_profile_version_ge(ctx, 4, 0)) -+ if (hlsl_version_ge(ctx, 4, 0)) - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); - - return true; -@@ -4130,6 +4361,7 @@ intrinsic_functions[] = - {"clamp", 3, true, intrinsic_clamp}, - {"clip", 1, true, intrinsic_clip}, - {"cos", 1, true, intrinsic_cos}, -+ {"cosh", 1, true, intrinsic_cosh}, - {"cross", 2, true, intrinsic_cross}, - {"ddx", 1, true, intrinsic_ddx}, - {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, -@@ -4138,6 +4370,7 @@ intrinsic_functions[] = - {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, - {"ddy_fine", 1, true, intrinsic_ddy_fine}, - {"degrees", 1, true, intrinsic_degrees}, -+ {"determinant", 1, true, intrinsic_determinant}, - {"distance", 2, true, intrinsic_distance}, - {"dot", 2, true, intrinsic_dot}, - {"exp", 1, true, intrinsic_exp}, -@@ -4160,15 +4393,18 @@ intrinsic_functions[] = - {"pow", 2, true, intrinsic_pow}, - {"radians", 1, true, intrinsic_radians}, - {"reflect", 2, true, intrinsic_reflect}, -+ {"refract", 3, true, intrinsic_refract}, - {"round", 1, true, intrinsic_round}, - {"rsqrt", 1, true, intrinsic_rsqrt}, - {"saturate", 1, true, intrinsic_saturate}, - {"sign", 1, true, intrinsic_sign}, - {"sin", 1, true, intrinsic_sin}, -+ {"sinh", 1, true, intrinsic_sinh}, - {"smoothstep", 3, true, intrinsic_smoothstep}, - {"sqrt", 1, true, intrinsic_sqrt}, - {"step", 2, true, intrinsic_step}, - {"tan", 1, true, intrinsic_tan}, -+ {"tanh", 1, true, intrinsic_tanh}, - {"tex1D", -1, false, intrinsic_tex1D}, - {"tex2D", -1, false, intrinsic_tex2D}, - {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, -@@ -4263,22 +4499,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type - return NULL; - - for (i = 0; i < params->args_count; ++i) -- { -- struct hlsl_ir_node *arg = params->args[i]; -- -- if (arg->data_type->class == HLSL_CLASS_OBJECT) -- { -- struct vkd3d_string_buffer *string; -- -- if ((string = hlsl_type_to_string(ctx, arg->data_type))) -- hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s for constructor argument.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -- continue; -- } -- -- initialize_var_components(ctx, params->instrs, var, &idx, arg); -- } -+ initialize_var_components(ctx, params->instrs, var, &idx, params->args[i]); - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -@@ -4318,26 +4539,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) - return false; - } -- else if (common_type->dimx == 1 && common_type->dimy == 1) -- { -- common_type = hlsl_get_numeric_type(ctx, cond_type->class, -- common_type->base_type, cond_type->dimx, cond_type->dimy); -- } -- else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) -+ else - { -- /* This condition looks wrong but is correct. -- * floatN is compatible with float1xN, but not with floatNx1. */ -- -- struct vkd3d_string_buffer *cond_string, *value_string; -+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, -+ cond_type->dimx, cond_type->dimy); -+ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) -+ return false; - -- cond_string = hlsl_type_to_string(ctx, cond_type); -- value_string = hlsl_type_to_string(ctx, common_type); -- if (cond_string && value_string) -- hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Ternary condition type '%s' is not compatible with value type '%s'.", -- cond_string->buffer, value_string->buffer); -- hlsl_release_string_buffer(ctx, cond_string); -- hlsl_release_string_buffer(ctx, value_string); -+ if (common_type->dimx == 1 && common_type->dimy == 1) -+ { -+ common_type = hlsl_get_numeric_type(ctx, cond_type->class, -+ common_type->e.numeric.type, cond_type->dimx, cond_type->dimy); -+ } -+ else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) -+ { -+ /* This condition looks wrong but is correct. -+ * floatN is compatible with float1xN, but not with floatNx1. */ -+ -+ struct vkd3d_string_buffer *cond_string, *value_string; -+ -+ cond_string = hlsl_type_to_string(ctx, cond_type); -+ value_string = hlsl_type_to_string(ctx, common_type); -+ if (cond_string && value_string) -+ hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Ternary condition type '%s' is not compatible with value type '%s'.", -+ cond_string->buffer, value_string->buffer); -+ hlsl_release_string_buffer(ctx, cond_string); -+ hlsl_release_string_buffer(ctx, value_string); -+ } - } - - if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) -@@ -4362,9 +4591,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - hlsl_release_string_buffer(ctx, second_string); - } - -+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, -+ cond_type->dimx, cond_type->dimy); -+ if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) -+ return false; -+ - common_type = first->data_type; - } - -+ assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ - args[0] = cond; - args[1] = first; - args[2] = second; -@@ -4490,8 +4726,7 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - } - - sampler_type = params->args[0]->data_type; -- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -+ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; - -@@ -4555,8 +4790,7 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - } - - sampler_type = params->args[0]->data_type; -- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) -+ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) - { - struct vkd3d_string_buffer *string; - -@@ -4666,8 +4900,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - } - - sampler_type = params->args[0]->data_type; -- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -+ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; - -@@ -4689,7 +4922,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; - -- load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->base_type, 4); -+ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); - load_params.resource = object; - load_params.sampler = params->args[0]; - -@@ -4903,8 +5136,7 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * - } - - sampler_type = params->args[0]->data_type; -- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -+ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; - -@@ -4966,8 +5198,7 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block - } - - sampler_type = params->args[0]->data_type; -- if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -+ if (sampler_type->class != HLSL_CLASS_SAMPLER || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; - -@@ -5051,8 +5282,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru - const struct hlsl_type *object_type = object->data_type; - const struct method_function *method; - -- if (object_type->class != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE -- || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -+ if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; - -@@ -5193,6 +5423,16 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - hlsl_release_string_buffer(ctx, string); - } - -+static bool state_block_add_entry(struct hlsl_state_block *state_block, struct hlsl_state_block_entry *entry) -+{ -+ if (!vkd3d_array_reserve((void **)&state_block->entries, &state_block->capacity, state_block->count + 1, -+ sizeof(*state_block->entries))) -+ return false; -+ -+ state_block->entries[state_block->count++] = entry; -+ return true; -+} -+ - } - - %locations -@@ -5233,6 +5473,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - struct parse_attribute_list attr_list; - struct hlsl_ir_switch_case *switch_case; - struct hlsl_scope *scope; -+ struct hlsl_state_block *state_block; -+ struct state_block_index state_block_index; - } - - %token KW_BLENDSTATE -@@ -5243,6 +5485,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_CENTROID - %token KW_COLUMN_MAJOR - %token KW_COMPILE -+%token KW_COMPUTESHADER - %token KW_CONST - %token KW_CONTINUE - %token KW_DEFAULT -@@ -5250,14 +5493,17 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_DEPTHSTENCILVIEW - %token KW_DISCARD - %token KW_DO -+%token KW_DOMAINSHADER - %token KW_DOUBLE - %token KW_ELSE -+%token KW_EXPORT - %token KW_EXTERN - %token KW_FALSE - %token KW_FOR - %token KW_FXGROUP - %token KW_GEOMETRYSHADER - %token KW_GROUPSHARED -+%token KW_HULLSHADER - %token KW_IF - %token KW_IN - %token KW_INLINE -@@ -5271,7 +5517,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_PACKOFFSET - %token KW_PASS - %token KW_PIXELSHADER --%token KW_PRECISE - %token KW_RASTERIZERORDEREDBUFFER - %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER - %token KW_RASTERIZERORDEREDTEXTURE1D -@@ -5429,6 +5674,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - - %type any_identifier - %type var_identifier -+%type stateblock_lhs_identifier - %type name_opt - - %type parameter -@@ -5436,13 +5682,17 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %type param_list - %type parameters - --%type register_opt --%type packoffset_opt -+%type register_reservation -+%type packoffset_reservation - - %type texture_type texture_ms_type uav_type rov_type - - %type semantic - -+%type state_block -+ -+%type state_block_index_opt -+ - %type switch_case - - %type field_type -@@ -5453,6 +5703,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %type type_no_void - %type typedef_type - -+%type state_block_list - %type type_spec - %type variable_decl - %type variable_def -@@ -5483,9 +5734,9 @@ name_opt: - | any_identifier - - pass: -- KW_PASS name_opt annotations_opt '{' '}' -+ KW_PASS name_opt annotations_opt '{' state_block_start state_block '}' - { -- if (!add_pass(ctx, $2, $3, &@1)) -+ if (!add_pass(ctx, $2, $3, $6, &@1)) - YYABORT; - } - -@@ -5535,10 +5786,6 @@ technique10: - struct hlsl_scope *scope = ctx->cur_scope; - hlsl_pop_scope(ctx); - -- if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT && ctx->profile->major_version == 2) -- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "The 'technique10' keyword is invalid for this profile."); -- - if (!add_technique(ctx, $2, scope, $3, "technique10", &@1)) - YYABORT; - } -@@ -5580,12 +5827,12 @@ effect_group: - } - - buffer_declaration: -- buffer_type any_identifier colon_attribute -+ var_modifiers buffer_type any_identifier colon_attribute annotations_opt - { -- if ($3.semantic.name) -- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); -+ if ($4.semantic.name) -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); - -- if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) -+ if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $2, $3, $1, &$4.reg_reservation, $5, &@3))) - YYABORT; - } - -@@ -5792,11 +6039,7 @@ attribute_list: - $$ = $1; - if (!(new_array = vkd3d_realloc($$.attrs, ($$.count + 1) * sizeof(*$$.attrs)))) - { -- unsigned int i; -- -- for (i = 0; i < $$.count; ++i) -- hlsl_free_attribute((void *)$$.attrs[i]); -- vkd3d_free($$.attrs); -+ cleanup_parse_attribute_list(&$$); - YYABORT; - } - $$.attrs = new_array; -@@ -5884,9 +6127,9 @@ func_prototype_no_attrs: - /* Functions are unconditionally inlined. */ - modifiers &= ~HLSL_MODIFIER_INLINE; - -- if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) -+ if (modifiers & ~(HLSL_MODIFIERS_MAJORITY_MASK | HLSL_MODIFIER_EXPORT)) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Only majority modifiers are allowed on functions."); -+ "Unexpected modifier used on a function."); - if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) - YYABORT; - if ((var = hlsl_get_var(ctx->globals, $3))) -@@ -6002,11 +6245,7 @@ func_prototype: - } - else - { -- unsigned int i; -- -- for (i = 0; i < $1.count; ++i) -- hlsl_free_attribute((void *)$1.attrs[i]); -- vkd3d_free($1.attrs); -+ cleanup_parse_attribute_list(&$1); - } - $$ = $2; - } -@@ -6060,12 +6299,12 @@ colon_attribute: - $$.reg_reservation.reg_type = 0; - $$.reg_reservation.offset_type = 0; - } -- | register_opt -+ | register_reservation - { - $$.semantic = (struct hlsl_semantic){0}; - $$.reg_reservation = $1; - } -- | packoffset_opt -+ | packoffset_reservation - { - $$.semantic = (struct hlsl_semantic){0}; - $$.reg_reservation = $1; -@@ -6087,22 +6326,57 @@ semantic: - } - - /* FIXME: Writemasks */ --register_opt: -+register_reservation: - ':' KW_REGISTER '(' any_identifier ')' - { -- $$ = parse_reg_reservation($4); -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ - vkd3d_free($4); - } - | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' - { -- FIXME("Ignoring shader target %s in a register reservation.\n", debugstr_a($4)); -+ memset(&$$, 0, sizeof($$)); -+ if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -+ } -+ else if (parse_reservation_space($6, &$$.reg_space)) -+ { -+ if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ } -+ else -+ { -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register or space reservation '%s'.", $6); -+ } -+ - vkd3d_free($4); -+ vkd3d_free($6); -+ } -+ | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -+ -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $6); -+ -+ if (!parse_reservation_space($8, &$$.reg_space)) -+ hlsl_error(ctx, &@8, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register space reservation '%s'.", $8); - -- $$ = parse_reg_reservation($6); -+ vkd3d_free($4); - vkd3d_free($6); -+ vkd3d_free($8); - } - --packoffset_opt: -+packoffset_reservation: - ':' KW_PACKOFFSET '(' any_identifier ')' - { - $$ = parse_packoffset(ctx, $4, NULL, &@$); -@@ -6307,7 +6581,7 @@ type_no_void: - YYABORT; - } - -- $$ = hlsl_type_clone(ctx, hlsl_get_vector_type(ctx, $3->base_type, $5), 0, 0); -+ $$ = hlsl_type_clone(ctx, hlsl_get_vector_type(ctx, $3->e.numeric.type, $5), 0, 0); - $$->is_minimum_precision = $3->is_minimum_precision; - } - | KW_VECTOR -@@ -6340,7 +6614,7 @@ type_no_void: - YYABORT; - } - -- $$ = hlsl_type_clone(ctx, hlsl_get_matrix_type(ctx, $3->base_type, $7, $5), 0, 0); -+ $$ = hlsl_type_clone(ctx, hlsl_get_matrix_type(ctx, $3->e.numeric.type, $7, $5), 0, 0); - $$->is_minimum_precision = $3->is_minimum_precision; - } - | KW_MATRIX -@@ -6388,7 +6662,7 @@ type_no_void: - { - validate_texture_format_type(ctx, $3, &@3); - -- if (shader_profile_version_lt(ctx, 4, 1)) -+ if (hlsl_version_lt(ctx, 4, 1)) - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); -@@ -6427,7 +6701,7 @@ type_no_void: - $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); - if ($$->is_minimum_precision) - { -- if (shader_profile_version_lt(ctx, 4, 0)) -+ if (hlsl_version_lt(ctx, 4, 0)) - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support minimum-precision types."); -@@ -6454,6 +6728,14 @@ type_no_void: - { - $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); - } -+ | KW_VERTEXSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true); -+ } -+ | KW_PIXELSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); -+ } - - type: - type_no_void -@@ -6583,22 +6865,97 @@ variable_decl: - $$->reg_reservation = $3.reg_reservation; - } - --state: -- any_identifier '=' expr ';' -+state_block_start: -+ %empty - { -- vkd3d_free($1); -- destroy_block($3); -+ ctx->in_state_block = 1; - } - --state_block_start: -+stateblock_lhs_identifier: -+ any_identifier -+ { -+ $$ = $1; -+ } -+ | KW_PIXELSHADER -+ { -+ if (!($$ = hlsl_strdup(ctx, "pixelshader"))) -+ YYABORT; -+ } -+ | KW_VERTEXSHADER -+ { -+ if (!($$ = hlsl_strdup(ctx, "vertexshader"))) -+ YYABORT; -+ } -+ -+state_block_index_opt: - %empty - { -- ctx->in_state_block = 1; -+ $$.has_index = false; -+ $$.index = 0; - } -+ | '[' C_INTEGER ']' -+ { -+ if ($2 < 0) -+ { -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, -+ "State block array index is not a positive integer constant."); -+ YYABORT; -+ } -+ $$.has_index = true; -+ $$.index = $2; -+ } - - state_block: - %empty -- | state_block state -+ { -+ if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) -+ YYABORT; -+ } -+ | state_block stateblock_lhs_identifier state_block_index_opt '=' complex_initializer ';' -+ { -+ struct hlsl_state_block_entry *entry; -+ unsigned int i; -+ -+ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) -+ YYABORT; -+ -+ entry->name = $2; -+ entry->lhs_has_index = $3.has_index; -+ entry->lhs_index = $3.index; -+ -+ entry->instrs = $5.instrs; -+ -+ entry->args_count = $5.args_count; -+ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) -+ YYABORT; -+ for (i = 0; i < entry->args_count; ++i) -+ hlsl_src_from_node(&entry->args[i], $5.args[i]); -+ vkd3d_free($5.args); -+ -+ $$ = $1; -+ state_block_add_entry($$, entry); -+ } -+ -+state_block_list: -+ '{' state_block '}' -+ { -+ if (!($$ = hlsl_alloc(ctx, sizeof(*$$)))) -+ YYABORT; -+ -+ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, -+ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) -+ YYABORT; -+ $$->state_blocks[$$->state_block_count++] = $2; -+ } -+ | state_block_list ',' '{' state_block '}' -+ { -+ $$ = $1; -+ -+ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, -+ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) -+ YYABORT; -+ $$->state_blocks[$$->state_block_count++] = $4; -+ } - - variable_def: - variable_decl -@@ -6611,6 +6968,24 @@ variable_def: - { - $$ = $1; - ctx->in_state_block = 0; -+ -+ if(!(vkd3d_array_reserve((void **)&$$->state_blocks, &$$->state_block_capacity, -+ $$->state_block_count + 1, sizeof(*$$->state_blocks)))) -+ YYABORT; -+ $$->state_blocks[$$->state_block_count++] = $4; -+ } -+ | variable_decl '{' state_block_start state_block_list '}' -+ { -+ $$ = $1; -+ ctx->in_state_block = 0; -+ -+ $$->state_blocks = $4->state_blocks; -+ $$->state_block_count = $4->state_block_count; -+ $$->state_block_capacity = $4->state_block_capacity; -+ $4->state_blocks = NULL; -+ $4->state_block_count = 0; -+ $4->state_block_capacity = 0; -+ free_parse_variable_def($4); - } - - variable_def_typed: -@@ -6727,10 +7102,6 @@ var_modifiers: - { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOPERSPECTIVE, &@1); - } -- | KW_PRECISE var_modifiers -- { -- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); -- } - | KW_SHARED var_modifiers - { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); -@@ -6779,7 +7150,20 @@ var_modifiers: - { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); - } -- -+ | KW_EXPORT var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_EXPORT, &@1); -+ } -+ | var_identifier var_modifiers -+ { -+ if (!strcmp($1, "precise")) -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); -+ else if (!strcmp($1, "single")) -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SINGLE, &@1); -+ else -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER, -+ "Unknown modifier %s.", debugstr_a($1)); -+ } - - complex_initializer: - initializer_expr -@@ -6978,6 +7362,7 @@ selection_statement: - { - destroy_block($6.then_block); - destroy_block($6.else_block); -+ cleanup_parse_attribute_list(&$1); - YYABORT; - } - -@@ -6985,10 +7370,12 @@ selection_statement: - { - destroy_block($6.then_block); - destroy_block($6.else_block); -+ cleanup_parse_attribute_list(&$1); - YYABORT; - } - destroy_block($6.then_block); - destroy_block($6.else_block); -+ cleanup_parse_attribute_list(&$1); - - $$ = $4; - hlsl_block_add_instr($$, instr); -@@ -7011,21 +7398,25 @@ loop_statement: - { - $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - | attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';' - { - $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - | attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement - { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - | attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement - { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - - switch_statement: -@@ -7038,6 +7429,7 @@ switch_statement: - { - destroy_switch_cases($8); - destroy_block($5); -+ cleanup_parse_attribute_list(&$1); - YYABORT; - } - -@@ -7048,6 +7440,7 @@ switch_statement: - if (!s) - { - destroy_block($5); -+ cleanup_parse_attribute_list(&$1); - YYABORT; - } - -@@ -7055,6 +7448,7 @@ switch_statement: - hlsl_block_add_instr($$, s); - - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - - switch_case: -@@ -7227,15 +7621,13 @@ primary_expr: - { - if (ctx->in_state_block) - { -- struct hlsl_ir_load *load; -- struct hlsl_ir_var *var; -+ struct hlsl_ir_node *constant; - -- if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", -- hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) -- YYABORT; -- if (!(load = hlsl_new_var_load(ctx, var, &@1))) -+ if (!(constant = hlsl_new_stateblock_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_block(ctx, &load->node))) -+ vkd3d_free($1); -+ -+ if (!($$ = make_block(ctx, constant))) - YYABORT; - } - else -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 307f86f55b7..27f16af51c5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -263,8 +263,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls - if (type1->dimx != type2->dimx) - return false; - -- return base_type_get_semantic_equivalent(type1->base_type) -- == base_type_get_semantic_equivalent(type2->base_type); -+ return base_type_get_semantic_equivalent(type1->e.numeric.type) -+ == base_type_get_semantic_equivalent(type2->e.numeric.type); - } - - static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -@@ -355,10 +355,10 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - if (!semantic->name) - return; - -- vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ vector_type_dst = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); - vector_type_src = vector_type_dst; - if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -- vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4); -+ vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); - - for (i = 0; i < hlsl_type_major_size(type); ++i) - { -@@ -427,7 +427,10 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * - { - field = &type->e.record.fields[i]; - if (hlsl_type_is_resource(field->type)) -+ { -+ hlsl_fixme(ctx, &field->loc, "Prepend uniform copies for resource components within structs."); - continue; -+ } - validate_field_semantic(ctx, field); - semantic = &field->semantic; - elem_semantic_index = semantic->index; -@@ -497,7 +500,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - if (!semantic->name) - return; - -- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); - - for (i = 0; i < hlsl_type_major_size(type); ++i) - { -@@ -1098,7 +1101,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - struct hlsl_ir_node *resource_load; - - assert(coords->data_type->class == HLSL_CLASS_VECTOR); -- assert(coords->data_type->base_type == HLSL_TYPE_UINT); -+ assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); - assert(coords->data_type->dimx == dim_count); - - if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) -@@ -1188,7 +1191,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s - { - struct hlsl_ir_node *new_cast, *swizzle; - -- dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); -+ dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->e.numeric.type); - /* We need to preserve the cast since it might be doing more than just - * turning the scalar into a vector. */ - if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) -@@ -1562,7 +1565,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, - var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), - new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); - -- if (instr->data_type->class != HLSL_CLASS_OBJECT) -+ if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR) - { - struct hlsl_ir_node *swizzle_node; - -@@ -1622,7 +1625,11 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -- case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: - break; - - case HLSL_CLASS_MATRIX: -@@ -1631,6 +1638,15 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - /* FIXME: Actually we shouldn't even get here, but we don't split - * matrices yet. */ - return false; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_VOID: -+ vkd3d_unreachable(); - } - - if (copy_propagation_replace_with_constant_vector(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) -@@ -1739,7 +1755,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s - { - unsigned int writemask = store->writemask; - -- if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) -+ if (!hlsl_is_numeric_type(store->rhs.node->data_type)) - writemask = VKD3DSP_WRITEMASK_0; - copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index); - } -@@ -2049,7 +2065,7 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - src_type = expr->operands[0].node->data_type; - - if (hlsl_types_are_equal(src_type, dst_type) -- || (src_type->base_type == dst_type->base_type && is_vec1(src_type) && is_vec1(dst_type))) -+ || (src_type->e.numeric.type == dst_type->e.numeric.type && is_vec1(src_type) && is_vec1(dst_type))) - { - hlsl_replace_node(&expr->node, expr->operands[0].node); - return true; -@@ -2176,7 +2192,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - type = rhs->data_type; - if (type->class != HLSL_CLASS_MATRIX) - return false; -- element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ element_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); - - if (rhs->type != HLSL_IR_LOAD) - { -@@ -2213,7 +2229,7 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - { - struct hlsl_ir_node *new_cast, *swizzle; - -- dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); -+ dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->dimx); - /* We need to preserve the cast since it might be doing more than just - * narrowing the vector. */ - if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) -@@ -2467,7 +2483,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - - op = HLSL_OP2_DOT; - if (type->dimx == 1) -- op = type->base_type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; -+ op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; - - /* Note: We may be creating a DOT for bool vectors here, which we need to lower to - * LOGIC_OR + LOGIC_AND. */ -@@ -2603,8 +2619,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - - hlsl_copy_deref(ctx, &load->sampler, &load->resource); - load->resource.var = var; -- assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); -- assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); -+ assert(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); -+ assert(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); - - return true; - } -@@ -2647,10 +2663,11 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) - return false; - } - --/* Append a FLOOR before a CAST to int or uint (which is written as a mere MOV). */ -+/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */ - static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg, *floor, *cast2; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; -+ struct hlsl_ir_node *arg, *floor, *res; - struct hlsl_ir_expr *expr; - - if (instr->type != HLSL_IR_EXPR) -@@ -2660,22 +2677,20 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return false; - - arg = expr->operands[0].node; -- if (instr->data_type->base_type != HLSL_TYPE_INT && instr->data_type->base_type != HLSL_TYPE_UINT) -- return false; -- if (arg->data_type->base_type != HLSL_TYPE_FLOAT && arg->data_type->base_type != HLSL_TYPE_HALF) -+ if (instr->data_type->e.numeric.type != HLSL_TYPE_INT && instr->data_type->e.numeric.type != HLSL_TYPE_UINT) - return false; -- -- /* Check that the argument is not already a FLOOR */ -- if (arg->type == HLSL_IR_EXPR && hlsl_ir_expr(arg)->op == HLSL_OP1_FLOOR) -+ if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) - return false; - - if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) - return false; - hlsl_block_add_instr(block, floor); - -- if (!(cast2 = hlsl_new_cast(ctx, floor, instr->data_type, &instr->loc))) -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = floor; -+ if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) - return false; -- hlsl_block_add_instr(block, cast2); -+ hlsl_block_add_instr(block, res); - - return true; - } -@@ -2903,12 +2918,60 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - return true; - } - --/* Use 'movc' for the ternary operator. */ -+static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; -+ struct hlsl_ir_node *arg, *arg_cast, *neg, *one, *sub, *res; -+ struct hlsl_constant_value one_value; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_expr *expr; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP1_LOGIC_NOT) -+ return false; -+ -+ arg = expr->operands[0].node; -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); -+ -+ /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ -+ assert(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg_cast); -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg_cast, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg); -+ -+ one_value.u[0].f = 1.0; -+ one_value.u[1].f = 1.0; -+ one_value.u[2].f = 1.0; -+ one_value.u[3].f = 1.0; -+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, one); -+ -+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg))) -+ return false; -+ hlsl_block_add_instr(block, sub); -+ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = sub; -+ if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, res); -+ -+ return true; -+} -+ -+/* Lower TERNARY to CMP for SM1. */ - static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; -- struct hlsl_ir_node *zero, *cond, *first, *second; -- struct hlsl_constant_value zero_value = { 0 }; -+ struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg; - struct hlsl_ir_expr *expr; - struct hlsl_type *type; - -@@ -2929,55 +2992,282 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return false; - } - -- if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, -+ instr->data_type->dimx, instr->data_type->dimy); -+ -+ if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, float_cond); -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, float_cond, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg); -+ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = neg; -+ operands[1] = second; -+ operands[2] = first; -+ if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) -+ return false; -+ -+ hlsl_block_add_instr(block, replacement); -+ return true; -+} -+ -+static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, -+ struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res, *ret; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_expr *expr; -+ bool negate = false; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS -+ && expr->op != HLSL_OP2_GEQUAL) -+ return false; -+ -+ arg1 = expr->operands[0].node; -+ arg2 = expr->operands[1].node; -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ -+ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg1_cast); -+ -+ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg2_cast); -+ -+ switch (expr->op) -+ { -+ case HLSL_OP2_EQUAL: -+ case HLSL_OP2_NEQUAL: -+ { -+ struct hlsl_ir_node *neg, *sub, *abs, *abs_neg; -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg); -+ -+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) -+ return false; -+ hlsl_block_add_instr(block, sub); -+ -+ if (ctx->profile->major_version >= 3) -+ { -+ if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, sub, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, abs); -+ } -+ else -+ { -+ /* Use MUL as a precarious ABS. */ -+ if (!(abs = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub))) -+ return false; -+ hlsl_block_add_instr(block, abs); -+ } -+ -+ if (!(abs_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, abs_neg); -+ -+ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, abs_neg, abs))) -+ return false; -+ hlsl_block_add_instr(block, slt); -+ -+ negate = (expr->op == HLSL_OP2_EQUAL); -+ break; -+ } -+ -+ case HLSL_OP2_GEQUAL: -+ case HLSL_OP2_LESS: -+ { -+ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg1_cast, arg2_cast))) -+ return false; -+ hlsl_block_add_instr(block, slt); -+ -+ negate = (expr->op == HLSL_OP2_GEQUAL); -+ break; -+ } -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (negate) - { -- struct hlsl_ir_node *abs, *neg; -+ struct hlsl_constant_value one_value; -+ struct hlsl_ir_node *one, *slt_neg; - -- if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) -+ one_value.u[0].f = 1.0; -+ one_value.u[1].f = 1.0; -+ one_value.u[2].f = 1.0; -+ one_value.u[3].f = 1.0; -+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) - return false; -- hlsl_block_add_instr(block, abs); -+ hlsl_block_add_instr(block, one); - -- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) -+ if (!(slt_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) - return false; -- hlsl_block_add_instr(block, neg); -+ hlsl_block_add_instr(block, slt_neg); - -- operands[0] = neg; -- operands[1] = second; -- operands[2] = first; -- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) -+ if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, slt_neg))) - return false; -- } -- else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -- { -- hlsl_fixme(ctx, &instr->loc, "Ternary operator is not implemented for %s profile.", ctx->profile->name); -- return false; -+ hlsl_block_add_instr(block, res); - } - else - { -- if (cond->data_type->base_type == HLSL_TYPE_FLOAT) -- { -- if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, zero); -+ res = slt; -+ } - -- operands[0] = zero; -- operands[1] = cond; -- type = cond->data_type; -- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); -- if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) -- return false; -- hlsl_block_add_instr(block, cond); -- } -+ /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT, -+ * and casts to BOOL have already been lowered to "!= 0". */ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = res; -+ if (!(ret = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, ret); -+ -+ return true; -+} -+ -+/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to -+ * CMP instructions (only available in pixel shaders). -+ * Based on the following equivalence: -+ * SLT(x, y) -+ * = (x < y) ? 1.0 : 0.0 -+ * = ((x - y) >= 0) ? 0.0 : 1.0 -+ * = CMP(x - y, 0.0, 1.0) -+ */ -+static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; -+ struct hlsl_constant_value zero_value, one_value; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_expr *expr; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP2_SLT) -+ return false; -+ -+ arg1 = expr->operands[0].node; -+ arg2 = expr->operands[1].node; -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ -+ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg1_cast); -+ -+ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg2_cast); -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg); -+ -+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) -+ return false; -+ hlsl_block_add_instr(block, sub); - -- memset(operands, 0, sizeof(operands)); -- operands[0] = cond; -- operands[1] = first; -- operands[2] = second; -- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) -+ memset(&zero_value, 0, sizeof(zero_value)); -+ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ -+ one_value.u[0].f = 1.0; -+ one_value.u[1].f = 1.0; -+ one_value.u[2].f = 1.0; -+ one_value.u[3].f = 1.0; -+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, one); -+ -+ if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one))) -+ return false; -+ hlsl_block_add_instr(block, cmp); -+ -+ return true; -+} -+ -+/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to -+ * SLT instructions (only available in vertex shaders). -+ * Based on the following equivalence: -+ * CMP(x, y, z) -+ * = (x >= 0) ? y : z -+ * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0) -+ * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0)) -+ */ -+static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2, *add; -+ struct hlsl_constant_value zero_value, one_value; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_expr *expr; -+ unsigned int i; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP3_CMP) -+ return false; -+ -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ -+ for (i = 0; i < 3; ++i) -+ { -+ args[i] = expr->operands[i].node; -+ -+ if (!(args_cast[i] = hlsl_new_cast(ctx, args[i], float_type, &instr->loc))) - return false; -+ hlsl_block_add_instr(block, args_cast[i]); - } - -- hlsl_block_add_instr(block, replacement); -+ memset(&zero_value, 0, sizeof(zero_value)); -+ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ -+ one_value.u[0].f = 1.0; -+ one_value.u[1].f = 1.0; -+ one_value.u[2].f = 1.0; -+ one_value.u[3].f = 1.0; -+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, one); -+ -+ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, args_cast[0], zero))) -+ return false; -+ hlsl_block_add_instr(block, slt); -+ -+ if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[2], slt))) -+ return false; -+ hlsl_block_add_instr(block, mul1); -+ -+ if (!(neg_slt = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, neg_slt); -+ -+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg_slt))) -+ return false; -+ hlsl_block_add_instr(block, sub); -+ -+ if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[1], sub))) -+ return false; -+ hlsl_block_add_instr(block, mul2); -+ -+ if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul1, mul2))) -+ return false; -+ hlsl_block_add_instr(block, add); -+ - return true; - } - -@@ -2996,7 +3286,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - arg_type = expr->operands[0].node->data_type; - if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_BOOL) -+ if (type->e.numeric.type != HLSL_TYPE_BOOL) - return false; - - /* Narrowing casts should have already been lowered. */ -@@ -3018,11 +3308,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) - { -+ struct hlsl_type *cond_type = condition->data_type; - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; - struct hlsl_ir_node *cond; - - assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); - -+ if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) -+ { -+ cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); -+ -+ if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc))) -+ return NULL; -+ hlsl_block_add_instr(instrs, condition); -+ } -+ - operands[0] = condition; - operands[1] = if_true; - operands[2] = if_false; -@@ -3050,7 +3350,7 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return false; - if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_INT) -+ if (type->e.numeric.type != HLSL_TYPE_INT) - return false; - utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); - -@@ -3116,7 +3416,7 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return false; - if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_INT) -+ if (type->e.numeric.type != HLSL_TYPE_INT) - return false; - utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); - -@@ -3175,7 +3475,7 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return false; - if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_INT) -+ if (type->e.numeric.type != HLSL_TYPE_INT) - return false; - - arg = expr->operands[0].node; -@@ -3206,14 +3506,14 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - if (expr->op != HLSL_OP2_DOT) - return false; - -- if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT -- || type->base_type == HLSL_TYPE_BOOL) -+ if (type->e.numeric.type == HLSL_TYPE_INT || type->e.numeric.type == HLSL_TYPE_UINT -+ || type->e.numeric.type == HLSL_TYPE_BOOL) - { - arg1 = expr->operands[0].node; - arg2 = expr->operands[1].node; - assert(arg1->data_type->dimx == arg2->data_type->dimx); - dimx = arg1->data_type->dimx; -- is_bool = type->base_type == HLSL_TYPE_BOOL; -+ is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; - - if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) - return false; -@@ -3259,7 +3559,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return false; - if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_FLOAT) -+ if (type->e.numeric.type != HLSL_TYPE_FLOAT) - return false; - btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); - -@@ -3308,6 +3608,63 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return true; - } - -+static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_expr *expr; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op == HLSL_OP1_CAST || instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT) -+ return false; -+ -+ switch (expr->op) -+ { -+ case HLSL_OP1_ABS: -+ case HLSL_OP1_NEG: -+ case HLSL_OP2_ADD: -+ case HLSL_OP2_DIV: -+ case HLSL_OP2_LOGIC_AND: -+ case HLSL_OP2_LOGIC_OR: -+ case HLSL_OP2_MAX: -+ case HLSL_OP2_MIN: -+ case HLSL_OP2_MUL: -+ { -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_ir_node *arg, *arg_cast, *float_expr, *ret; -+ struct hlsl_type *float_type; -+ unsigned int i; -+ -+ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) -+ { -+ arg = expr->operands[i].node; -+ if (!arg) -+ continue; -+ -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); -+ if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, arg_cast); -+ -+ operands[i] = arg_cast; -+ } -+ -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); -+ if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, float_expr); -+ -+ if (!(ret = hlsl_new_cast(ctx, float_expr, instr->data_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, ret); -+ -+ return true; -+ } -+ default: -+ return false; -+ } -+} -+ - static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; -@@ -3402,6 +3759,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_SWITCH: - break; -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ /* Stateblock constants should not appear in the shader program. */ -+ vkd3d_unreachable(); - } - - return false; -@@ -3457,9 +3817,6 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - { - unsigned int r; - -- if (!hlsl_type_is_resource(var->data_type)) -- continue; -- - if (var->reg_reservation.reg_type) - { - for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) -@@ -3493,6 +3850,22 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - } - } - -+static void deref_mark_last_read(struct hlsl_deref *deref, unsigned int last_read) -+{ -+ unsigned int i; -+ -+ if (hlsl_deref_is_lowered(deref)) -+ { -+ if (deref->rel_offset.node) -+ deref->rel_offset.node->last_read = last_read; -+ } -+ else -+ { -+ for (i = 0; i < deref->path_len; ++i) -+ deref->path[i].node->last_read = last_read; -+ } -+} -+ - /* Compute the earliest and latest liveness for each variable. In the case that - * a variable is accessed inside of a loop, we promote its liveness to extend - * to at least the range of the entire loop. We also do this for nodes, so that -@@ -3512,6 +3885,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - case HLSL_IR_CALL: - /* We should have inlined all calls before computing liveness. */ - vkd3d_unreachable(); -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ /* Stateblock constants should not appear in the shader program. */ -+ vkd3d_unreachable(); - - case HLSL_IR_STORE: - { -@@ -3521,8 +3897,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - if (!var->first_write) - var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; - store->rhs.node->last_read = last_read; -- if (store->lhs.rel_offset.node) -- store->lhs.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&store->lhs, last_read); - break; - } - case HLSL_IR_EXPR: -@@ -3549,8 +3924,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - - var = load->src.var; - var->last_read = max(var->last_read, last_read); -- if (load->src.rel_offset.node) -- load->src.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&load->src, last_read); - break; - } - case HLSL_IR_LOOP: -@@ -3567,14 +3941,12 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - - var = load->resource.var; - var->last_read = max(var->last_read, last_read); -- if (load->resource.rel_offset.node) -- load->resource.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&load->resource, last_read); - - if ((var = load->sampler.var)) - { - var->last_read = max(var->last_read, last_read); -- if (load->sampler.rel_offset.node) -- load->sampler.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&load->sampler, last_read); - } - - if (load->coords.node) -@@ -3599,8 +3971,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - - var = store->resource.var; - var->last_read = max(var->last_read, last_read); -- if (store->resource.rel_offset.node) -- store->resource.rel_offset.node->last_read = last_read; -+ deref_mark_last_read(&store->resource, last_read); - store->coords.node->last_read = last_read; - store->value.node->last_read = last_read; - break; -@@ -3877,34 +4248,67 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls - return false; - } - --static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) - { -- struct hlsl_ir_resource_load *load; -- struct hlsl_ir_var *var; -- enum hlsl_regset regset; -+ struct hlsl_ir_var *var = deref->var; -+ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); -+ uint32_t required_bind_count; -+ struct hlsl_type *type; - unsigned int index; - -- if (instr->type != HLSL_IR_RESOURCE_LOAD) -- return false; -- -- load = hlsl_ir_resource_load(instr); -- var = load->resource.var; -+ if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) -+ return; - -- regset = hlsl_deref_get_regset(ctx, &load->resource); -+ if (regset <= HLSL_REGSET_LAST_OBJECT) -+ { -+ var->objects_usage[regset][index].used = true; -+ var->bind_count[regset] = max(var->bind_count[regset], index + 1); -+ } -+ else if (regset == HLSL_REGSET_NUMERIC) -+ { -+ type = hlsl_deref_get_type(ctx, deref); - -- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -- return false; -+ hlsl_regset_index_from_deref(ctx, deref, regset, &index); -+ required_bind_count = align(index + type->reg_size[regset], 4) / 4; -+ var->bind_count[regset] = max(var->bind_count[regset], required_bind_count); -+ } -+ else -+ { -+ vkd3d_unreachable(); -+ } -+} - -- var->objects_usage[regset][index].used = true; -- var->bind_count[regset] = max(var->bind_count[regset], index + 1); -- if (load->sampler.var) -+static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ switch (instr->type) - { -- var = load->sampler.var; -- if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) -- return false; -+ case HLSL_IR_LOAD: -+ { -+ struct hlsl_ir_load *load = hlsl_ir_load(instr); -+ -+ if (!load->src.var->is_uniform) -+ return false; - -- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; -- var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); -+ /* These will are handled by validate_static_object_references(). */ -+ if (hlsl_deref_get_regset(ctx, &load->src) != HLSL_REGSET_NUMERIC) -+ return false; -+ -+ register_deref_usage(ctx, &load->src); -+ break; -+ } -+ -+ case HLSL_IR_RESOURCE_LOAD: -+ register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->resource); -+ if (hlsl_ir_resource_load(instr)->sampler.var) -+ register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->sampler); -+ break; -+ -+ case HLSL_IR_RESOURCE_STORE: -+ register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); -+ break; -+ -+ default: -+ break; - } - - return false; -@@ -4083,7 +4487,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - continue; - value = &constant->value.u[i++]; - -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - f = !!value->u; -@@ -4149,16 +4553,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - } - } - -+static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort) -+{ -+ struct hlsl_ir_var *var; -+ -+ list_remove(&to_sort->extern_entry); -+ -+ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) -+ { -+ uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC]; -+ uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC]; -+ -+ if (to_sort_size > var_size) -+ { -+ list_add_before(&var->extern_entry, &to_sort->extern_entry); -+ return; -+ } -+ } -+ -+ list_add_tail(sorted, &to_sort->extern_entry); -+} -+ -+static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) -+{ -+ struct list sorted = LIST_INIT(sorted); -+ struct hlsl_ir_var *var, *next; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_uniform) -+ sort_uniform_by_numeric_bind_count(&sorted, var); -+ } -+ list_move_tail(&ctx->extern_vars, &sorted); -+} -+ - static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct register_allocator allocator = {0}; - struct hlsl_ir_var *var; - -+ sort_uniforms_by_numeric_bind_count(ctx); -+ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; - -- if (!var->is_uniform || !var->last_read || reg_size == 0) -+ if (!var->is_uniform || reg_size == 0) - continue; - - if (var->reg_reservation.reg_type == 'c') -@@ -4189,15 +4629,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; - -- if (!var->is_uniform || !var->last_read || reg_size == 0) -+ if (!var->is_uniform || alloc_size == 0) - continue; - - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) - { -- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, -- 1, UINT_MAX, var->data_type); -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size); - TRACE("Allocated %s to %s.\n", var->name, - debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); - } -@@ -4435,7 +4874,9 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) - continue; - - if (var1->reg_reservation.offset_type -- || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) -+ || var1->reg_reservation.reg_type == 's' -+ || var1->reg_reservation.reg_type == 't' -+ || var1->reg_reservation.reg_type == 'u') - buffer->manually_packed_elements = true; - else - buffer->automatically_packed_elements = true; -@@ -4674,7 +5115,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - - /* We should always have generated a cast to UINT. */ - assert(path_node->data_type->class == HLSL_CLASS_SCALAR -- && path_node->data_type->base_type == HLSL_TYPE_UINT); -+ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; - -@@ -4729,14 +5170,15 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - return true; - } - -+/* Retrieves true if the index is constant, and false otherwise. In the latter case, the maximum -+ * possible index is retrieved, assuming there is not out-of-bounds access. */ - bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - enum hlsl_regset regset, unsigned int *index) - { - struct hlsl_type *type = deref->var->data_type; -+ bool index_is_constant = true; - unsigned int i; - -- assert(regset <= HLSL_REGSET_LAST_OBJECT); -- - *index = 0; - - for (i = 0; i < deref->path_len; ++i) -@@ -4745,37 +5187,62 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - unsigned int idx = 0; - - assert(path_node); -- if (path_node->type != HLSL_IR_CONSTANT) -- return false; -+ if (path_node->type == HLSL_IR_CONSTANT) -+ { -+ /* We should always have generated a cast to UINT. */ -+ assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - -- /* We should always have generated a cast to UINT. */ -- assert(path_node->data_type->class == HLSL_CLASS_SCALAR -- && path_node->data_type->base_type == HLSL_TYPE_UINT); -+ idx = hlsl_ir_constant(path_node)->value.u[0].u; - -- idx = hlsl_ir_constant(path_node)->value.u[0].u; -+ switch (type->class) -+ { -+ case HLSL_CLASS_ARRAY: -+ if (idx >= type->e.array.elements_count) -+ return false; - -- switch (type->class) -+ *index += idx * type->e.array.type->reg_size[regset]; -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ *index += type->e.record.fields[idx].reg_offset[regset]; -+ break; -+ -+ case HLSL_CLASS_MATRIX: -+ *index += 4 * idx; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ } -+ else - { -- case HLSL_CLASS_ARRAY: -- if (idx >= type->e.array.elements_count) -- return false; -+ index_is_constant = false; - -- *index += idx * type->e.array.type->reg_size[regset]; -- break; -+ switch (type->class) -+ { -+ case HLSL_CLASS_ARRAY: -+ idx = type->e.array.elements_count - 1; -+ *index += idx * type->e.array.type->reg_size[regset]; -+ break; - -- case HLSL_CLASS_STRUCT: -- *index += type->e.record.fields[idx].reg_offset[regset]; -- break; -+ case HLSL_CLASS_MATRIX: -+ idx = hlsl_type_major_size(type) - 1; -+ *index += idx * 4; -+ break; - -- default: -- vkd3d_unreachable(); -+ default: -+ vkd3d_unreachable(); -+ } - } - - type = hlsl_get_element_type_from_path_index(ctx, type, path_node); - } - -- assert(type->reg_size[regset] == 1); -- return true; -+ assert(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); -+ assert(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); -+ return index_is_constant; - } - - bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) -@@ -4790,7 +5257,7 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - { - /* We should always have generated a cast to UINT. */ - assert(offset_node->data_type->class == HLSL_CLASS_SCALAR -- && offset_node->data_type->base_type == HLSL_TYPE_UINT); -+ && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - assert(offset_node->type != HLSL_IR_CONSTANT); - return false; - } -@@ -4857,7 +5324,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - const struct hlsl_ir_constant *constant; - - if (type->class != HLSL_CLASS_SCALAR -- || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) -+ || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) - { - struct vkd3d_string_buffer *string; - -@@ -4876,8 +5343,8 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - } - constant = hlsl_ir_constant(instr); - -- if ((type->base_type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) -- || (type->base_type == HLSL_TYPE_UINT && !constant->value.u[0].u)) -+ if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) -+ || (type->e.numeric.type == HLSL_TYPE_UINT && !constant->value.u[0].u)) - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, - "Thread count must be a positive integer."); - -@@ -4885,25 +5352,6 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - } - } - --static bool type_has_object_components(struct hlsl_type *type) --{ -- if (type->class == HLSL_CLASS_OBJECT) -- return true; -- if (type->class == HLSL_CLASS_ARRAY) -- return type_has_object_components(type->e.array.type); -- if (type->class == HLSL_CLASS_STRUCT) -- { -- unsigned int i; -- -- for (i = 0; i < type->e.record.field_count; ++i) -- { -- if (type_has_object_components(type->e.record.fields[i].type)) -- return true; -- } -- } -- return false; --} -- - static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) - { - struct hlsl_ir_node *instr, *next; -@@ -4960,15 +5408,42 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod - } - } - --void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *body) -+void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - { -- struct hlsl_ir_var *var; -+ bool progress; - -- LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -+ lower_ir(ctx, lower_matrix_swizzles, body); -+ lower_ir(ctx, lower_index_loads, body); -+ -+ lower_ir(ctx, lower_broadcasts, body); -+ while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); -+ do - { -- if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -- prepend_uniform_copy(ctx, body, var); -+ progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); -+ progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); - } -+ while (progress); -+ hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); -+ -+ lower_ir(ctx, lower_narrowing_casts, body); -+ lower_ir(ctx, lower_int_dot, body); -+ lower_ir(ctx, lower_int_division, body); -+ lower_ir(ctx, lower_int_modulus, body); -+ lower_ir(ctx, lower_int_abs, body); -+ lower_ir(ctx, lower_casts_to_bool, body); -+ lower_ir(ctx, lower_float_modulus, body); -+ hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); -+ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, body); -+ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); -+ } while (progress); - } - - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -@@ -4979,7 +5454,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - struct recursive_call_ctx recursive_call_ctx; - struct hlsl_ir_var *var; - unsigned int i; -- bool progress; - - list_move_head(&body->instrs, &ctx->static_initializers.instrs); - -@@ -4999,7 +5473,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_matrix_swizzles, body); - lower_ir(ctx, lower_index_loads, body); - -- hlsl_prepend_global_uniform_copy(ctx, body); -+ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -+ prepend_uniform_copy(ctx, body, var); -+ } - - for (i = 0; i < entry_func->parameters.count; ++i) - { -@@ -5011,9 +5489,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - } - else - { -- if (type_has_object_components(var->data_type)) -- hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); -- - if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT - && !var->semantic.name) - { -@@ -5056,34 +5531,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - { - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); - } -- lower_ir(ctx, lower_broadcasts, body); -- while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); -- do -- { -- progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); -- progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); -- } -- while (progress); -- hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); - -- lower_ir(ctx, lower_narrowing_casts, body); -- lower_ir(ctx, lower_casts_to_bool, body); -- lower_ir(ctx, lower_int_dot, body); -- lower_ir(ctx, lower_int_division, body); -- lower_ir(ctx, lower_int_modulus, body); -- lower_ir(ctx, lower_int_abs, body); -- lower_ir(ctx, lower_float_modulus, body); -- hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); -- do -- { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, body); -- progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -- progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); -- progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); -- } -- while (progress); -+ hlsl_run_const_passes(ctx, body); -+ - remove_unreachable_code(ctx, body); - hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); - -@@ -5095,12 +5545,23 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - if (profile->major_version >= 4) - hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); -- hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); -+ -+ do -+ compute_liveness(ctx, entry_func); -+ while (hlsl_transform_ir(ctx, dce, body, NULL)); -+ -+ hlsl_transform_ir(ctx, track_components_usage, body, NULL); - sort_synthetic_separated_samplers_first(ctx); - -- lower_ir(ctx, lower_ternary, body); - if (profile->major_version < 4) - { -+ lower_ir(ctx, lower_ternary, body); -+ -+ lower_ir(ctx, lower_nonfloat_exprs, body); -+ /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */ -+ hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ lower_ir(ctx, lower_casts_to_bool, body); -+ - lower_ir(ctx, lower_casts_to_int, body); - lower_ir(ctx, lower_division, body); - lower_ir(ctx, lower_sqrt, body); -@@ -5108,6 +5569,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_round, body); - lower_ir(ctx, lower_ceil, body); - lower_ir(ctx, lower_floor, body); -+ lower_ir(ctx, lower_comparison_operators, body); -+ lower_ir(ctx, lower_logic_not, body); -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ lower_ir(ctx, lower_slt, body); -+ else -+ lower_ir(ctx, lower_cmp, body); - } - - if (profile->major_version < 2) -@@ -5117,6 +5584,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - - lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); - -+ do -+ compute_liveness(ctx, entry_func); -+ while (hlsl_transform_ir(ctx, dce, body, NULL)); -+ - /* TODO: move forward, remove when no longer needed */ - transform_derefs(ctx, replace_deref_path_with_offset, body); - while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index b76b1fce507..16015fa8a81 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -25,10 +25,10 @@ - static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -116,10 +116,10 @@ static int32_t double_to_int(double x) - static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -158,7 +158,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src->node.data_type->base_type) -+ switch (src->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -200,7 +200,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - vkd3d_unreachable(); - } - -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -231,10 +231,10 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -257,10 +257,10 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -283,10 +283,10 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -309,11 +309,11 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - float i; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -336,10 +336,10 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -381,10 +381,10 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -415,10 +415,10 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -439,10 +439,10 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -484,10 +484,10 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -524,10 +524,10 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -550,10 +550,10 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -595,11 +595,11 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -632,11 +632,11 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -659,11 +659,11 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -686,11 +686,11 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const - static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -712,11 +712,11 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - assert(src1->node.data_type->dimx == src2->node.data_type->dimx); - - dst->u[0].f = 0.0f; -@@ -740,12 +740,12 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -- assert(type == src3->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); -+ assert(type == src3->node.data_type->e.numeric.type); - assert(src1->node.data_type->dimx == src2->node.data_type->dimx); - assert(src3->node.data_type->dimx == 1); - -@@ -771,11 +771,11 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, - const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -841,12 +841,12 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co - { - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -877,12 +877,12 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -916,12 +916,12 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - { - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -955,14 +955,14 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->base_type == src1->node.data_type->base_type); -- assert(src2->node.data_type->base_type == HLSL_TYPE_INT); -+ assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -+ assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - - for (k = 0; k < dst_type->dimx; ++k) - { - unsigned int shift = src2->value.u[k].u % 32; - -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_INT: - dst->u[k].i = src1->value.u[k].i << shift; -@@ -983,11 +983,11 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1021,11 +1021,11 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1060,11 +1060,11 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, - const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1102,11 +1102,11 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1139,12 +1139,12 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -1175,32 +1175,13 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - { - unsigned int k; - -- assert(dst_type->base_type == src2->node.data_type->base_type); -- assert(dst_type->base_type == src3->node.data_type->base_type); -+ assert(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ assert(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); -+ assert(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); - - for (k = 0; k < dst_type->dimx; ++k) -- { -- switch (src1->node.data_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- dst->u[k] = src1->value.u[k].f != 0.0f ? src2->value.u[k] : src3->value.u[k]; -- break; -- -- case HLSL_TYPE_DOUBLE: -- dst->u[k] = src1->value.u[k].d != 0.0 ? src2->value.u[k] : src3->value.u[k]; -- break; -+ dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; - -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- case HLSL_TYPE_BOOL: -- dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; -- break; -- -- default: -- vkd3d_unreachable(); -- } -- } - return true; - } - -@@ -1209,14 +1190,14 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->base_type == src1->node.data_type->base_type); -- assert(src2->node.data_type->base_type == HLSL_TYPE_INT); -+ assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -+ assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - - for (k = 0; k < dst_type->dimx; ++k) - { - unsigned int shift = src2->value.u[k].u % 32; - -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_INT: - dst->u[k].i = src1->value.u[k].i >> shift; -@@ -1415,6 +1396,136 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return success; - } - -+static bool constant_is_zero(struct hlsl_ir_constant *const_arg) -+{ -+ struct hlsl_type *data_type = const_arg->node.data_type; -+ unsigned int k; -+ -+ for (k = 0; k < data_type->dimx; ++k) -+ { -+ switch (data_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (const_arg->value.u[k].f != 0.0f) -+ return false; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (const_arg->value.u[k].d != 0.0) -+ return false; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_BOOL: -+ if (const_arg->value.u[k].u != 0) -+ return false; -+ break; -+ -+ default: -+ return false; -+ } -+ } -+ return true; -+} -+ -+static bool constant_is_one(struct hlsl_ir_constant *const_arg) -+{ -+ struct hlsl_type *data_type = const_arg->node.data_type; -+ unsigned int k; -+ -+ for (k = 0; k < data_type->dimx; ++k) -+ { -+ switch (data_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (const_arg->value.u[k].f != 1.0f) -+ return false; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (const_arg->value.u[k].d != 1.0) -+ return false; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_BOOL: -+ if (const_arg->value.u[k].u != 1) -+ return false; -+ break; -+ -+ default: -+ return false; -+ } -+ } -+ return true; -+} -+ -+bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_constant *const_arg = NULL; -+ struct hlsl_ir_node *mut_arg = NULL; -+ struct hlsl_ir_node *res_node; -+ struct hlsl_ir_expr *expr; -+ unsigned int i; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ -+ if (instr->data_type->class > HLSL_CLASS_VECTOR) -+ return false; -+ -+ /* Verify that the expression has two operands. */ -+ for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) -+ { -+ if (!!expr->operands[i].node != (i < 2)) -+ return false; -+ } -+ -+ if (expr->operands[0].node->type == HLSL_IR_CONSTANT) -+ { -+ const_arg = hlsl_ir_constant(expr->operands[0].node); -+ mut_arg = expr->operands[1].node; -+ } -+ else if (expr->operands[1].node->type == HLSL_IR_CONSTANT) -+ { -+ mut_arg = expr->operands[0].node; -+ const_arg = hlsl_ir_constant(expr->operands[1].node); -+ } -+ else -+ { -+ return false; -+ } -+ -+ res_node = NULL; -+ switch (expr->op) -+ { -+ case HLSL_OP2_ADD: -+ if (constant_is_zero(const_arg)) -+ res_node = mut_arg; -+ break; -+ -+ case HLSL_OP2_MUL: -+ if (constant_is_one(const_arg)) -+ res_node = mut_arg; -+ break; -+ -+ default: -+ break; -+ } -+ -+ if (res_node) -+ { -+ hlsl_replace_node(&expr->node, res_node); -+ return true; -+ } -+ return false; -+} -+ - bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_constant_value value; -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index f0bd85338c6..9514ddb980f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -17,9 +17,11 @@ - */ - - #include "vkd3d_shader_private.h" -+#include "vkd3d_types.h" - - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) - { -+ memset(program, 0, sizeof(*program)); - program->shader_version = *version; - return shader_instruction_array_init(&program->instructions, reserve); - } -@@ -32,6 +34,9 @@ void vsir_program_cleanup(struct vsir_program *program) - vkd3d_free((void *)program->block_names[i]); - vkd3d_free(program->block_names); - shader_instruction_array_destroy(&program->instructions); -+ shader_signature_cleanup(&program->input_signature); -+ shader_signature_cleanup(&program->output_signature); -+ shader_signature_cleanup(&program->patch_constant_signature); - } - - static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) -@@ -53,19 +58,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i - vsir_instruction_init(ins, &location, VKD3DSIH_NOP); - } - --static void remove_dcl_temps(struct vsir_program *program) --{ -- unsigned int i; -- -- for (i = 0; i < program->instructions.count; ++i) -- { -- struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -- -- if (ins->handler_idx == VKD3DSIH_DCL_TEMPS) -- vkd3d_shader_instruction_make_nop(ins); -- } --} -- - static bool vsir_instruction_init_with_params(struct vsir_program *program, - struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) -@@ -91,86 +83,164 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, - return true; - } - --static enum vkd3d_result instruction_array_lower_texkills(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, -+ struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) - { -- struct vsir_program *program = &parser->program; -+ const unsigned int components_read = 3 + (program->shader_version.major >= 2); - struct vkd3d_shader_instruction_array *instructions = &program->instructions; -- struct vkd3d_shader_instruction *texkill_ins, *ins; -- unsigned int components_read = 3 + (program->shader_version.major >= 2); -- unsigned int tmp_idx = ~0u; -- unsigned int i, k; -- -- for (i = 0; i < instructions->count; ++i) -- { -- texkill_ins = &instructions->elements[i]; -+ size_t pos = texkill - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int j; - -- if (texkill_ins->handler_idx != VKD3DSIH_TEXKILL) -- continue; -+ if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- if (!shader_instruction_array_insert_at(instructions, i + 1, components_read + 1)) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ if (*tmp_idx == ~0u) -+ *tmp_idx = program->temp_count++; - -- if (tmp_idx == ~0u) -- tmp_idx = program->temp_count++; -+ /* tmp = ins->dst[0] < 0 */ - -- /* tmp = ins->dst[0] < 0 */ -+ ins = &instructions->elements[pos + 1]; -+ if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_LTO, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - -- ins = &instructions->elements[i + 1]; -- if (!vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_LTO, 1, 2)) -+ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].reg.idx[0].offset = *tmp_idx; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; -+ -+ ins->src[0].reg = texkill->dst[0].reg; -+ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[1].reg.u.immconst_f32[0] = 0.0f; -+ ins->src[1].reg.u.immconst_f32[1] = 0.0f; -+ ins->src[1].reg.u.immconst_f32[2] = 0.0f; -+ ins->src[1].reg.u.immconst_f32[3] = 0.0f; -+ -+ /* tmp.x = tmp.x || tmp.y */ -+ /* tmp.x = tmp.x || tmp.z */ -+ /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ -+ -+ for (j = 1; j < components_read; ++j) -+ { -+ ins = &instructions->elements[pos + 1 + j]; -+ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_OR, 1, 2))) - return VKD3D_ERROR_OUT_OF_MEMORY; - - vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->dst[0].reg.idx[0].offset = tmp_idx; -- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; -+ ins->dst[0].reg.idx[0].offset = *tmp_idx; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; - -- ins->src[0].reg = texkill_ins->dst[0].reg; -- vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].reg.idx[0].offset = *tmp_idx; -+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[1].reg.u.immconst_f32[0] = 0.0f; -- ins->src[1].reg.u.immconst_f32[1] = 0.0f; -- ins->src[1].reg.u.immconst_f32[2] = 0.0f; -- ins->src[1].reg.u.immconst_f32[3] = 0.0f; -+ ins->src[1].reg.idx[0].offset = *tmp_idx; -+ ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j); -+ } - -- /* tmp.x = tmp.x || tmp.y */ -- /* tmp.x = tmp.x || tmp.z */ -- /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ -+ /* discard_nz tmp.x */ - -- for (k = 1; k < components_read; ++k) -- { -- ins = &instructions->elements[i + 1 + k]; -- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_OR, 1, 2))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins = &instructions->elements[pos + 1 + components_read]; -+ if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_DISCARD, 0, 1))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; - -- vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->dst[0].reg.idx[0].offset = tmp_idx; -- ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; -- -- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[0].reg.idx[0].offset = tmp_idx; -- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -- vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[1].reg.idx[0].offset = tmp_idx; -- ins->src[1].swizzle = vkd3d_shader_create_swizzle(k, k, k, k); -- } -+ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].reg.idx[0].offset = *tmp_idx; -+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - -- /* discard_nz tmp.x */ -+ /* Make the original instruction no-op */ -+ vkd3d_shader_instruction_make_nop(texkill); - -- ins = &instructions->elements[i + 1 + components_read]; -- if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_DISCARD, 0, 1))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; -+ return VKD3D_OK; -+} - -- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[0].reg.idx[0].offset = tmp_idx; -- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+/* The Shader Model 5 Assembly documentation states: "If components of a mad -+ * instruction are tagged as precise, the hardware must execute a mad instruction -+ * or the exact equivalent, and it cannot split it into a multiply followed by an add." -+ * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is -+ * not fused for "precise" operations." -+ * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ -+static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program, -+ struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ struct vkd3d_shader_instruction *mul_ins, *add_ins; -+ size_t pos = mad - instructions->elements; -+ struct vkd3d_shader_dst_param *mul_dst; -+ -+ if (!(mad->flags & VKD3DSI_PRECISE_XYZW)) -+ return VKD3D_OK; -+ -+ if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (*tmp_idx == ~0u) -+ *tmp_idx = program->temp_count++; -+ -+ mul_ins = &instructions->elements[pos]; -+ add_ins = &instructions->elements[pos + 1]; -+ -+ mul_ins->handler_idx = VKD3DSIH_MUL; -+ mul_ins->src_count = 2; -+ -+ if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW; -+ -+ mul_dst = mul_ins->dst; -+ *add_ins->dst = *mul_dst; -+ -+ mul_dst->modifiers = 0; -+ vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1); -+ mul_dst->reg.dimension = add_ins->dst->reg.dimension; -+ mul_dst->reg.idx[0].offset = *tmp_idx; -+ -+ add_ins->src[0].reg = mul_dst->reg; -+ add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask); -+ add_ins->src[0].modifiers = 0; -+ add_ins->src[1] = mul_ins->src[2]; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ unsigned int tmp_idx = ~0u, i; -+ enum vkd3d_result ret; -+ -+ for (i = 0; i < instructions->count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &instructions->elements[i]; -+ -+ switch (ins->handler_idx) -+ { -+ case VKD3DSIH_TEXKILL: -+ if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) -+ return ret; -+ break; -+ -+ case VKD3DSIH_MAD: -+ if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0) -+ return ret; -+ break; -+ -+ case VKD3DSIH_DCL_CONSTANT_BUFFER: -+ case VKD3DSIH_DCL_TEMPS: -+ vkd3d_shader_instruction_make_nop(ins); -+ break; - -- /* Make the original instruction no-op */ -- vkd3d_shader_instruction_make_nop(texkill_ins); -+ default: -+ break; -+ } - } - - return VKD3D_OK; -@@ -227,10 +297,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( - return NULL; - } - --static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info) -+static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) - { -- struct shader_signature *signature = &parser->shader_desc.output_signature; -+ const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; -+ struct shader_signature *signature = &program->output_signature; - const struct vkd3d_shader_varying_map_info *varying_map; - unsigned int i; - -@@ -252,7 +323,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars - * location with a different mask. */ - if (input_mask && input_mask != e->mask) - { -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " - "Output mask %#x does not match input mask %#x.", - e->mask, input_mask); -@@ -269,7 +340,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars - { - if (varying_map->varying_map[i].output_signature_index >= signature->element_count) - { -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " - "The next stage consumes varyings not written by this stage."); - return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -453,7 +524,7 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader - - void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) - { -- vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UINT, 1); -+ vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); - param->reg.dimension = VSIR_DIMENSION_NONE; - param->reg.idx[0].offset = label_id; - } -@@ -464,12 +535,24 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned - src->reg.idx[0].offset = idx; - } - -+static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -+ src->reg.idx[0].offset = idx; -+} -+ - static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) - { - vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); - dst->reg.idx[0].offset = idx; - } - -+static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -+ dst->reg.idx[0].offset = idx; -+} -+ - static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) - { - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -@@ -554,11 +637,14 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont - return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; - } - --static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( -+struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( - struct vkd3d_shader_instruction_array *instructions) - { - struct vkd3d_shader_src_param *rel_addr; - -+ if (instructions->outpointid_param) -+ return instructions->outpointid_param; -+ - if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) - return NULL; - -@@ -566,6 +652,7 @@ static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( - rel_addr->swizzle = 0; - rel_addr->modifiers = 0; - -+ instructions->outpointid_param = rel_addr; - return rel_addr; - } - -@@ -1383,10 +1470,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - } - } - --static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) - { -- struct io_normaliser normaliser = {parser->program.instructions}; -- struct vsir_program *program = &parser->program; -+ struct io_normaliser normaliser = {program->instructions}; - struct vkd3d_shader_instruction *ins; - bool has_control_point_phase; - unsigned int i, j; -@@ -1394,9 +1480,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse - normaliser.phase = VKD3DSIH_INVALID; - normaliser.shader_type = program->shader_version.type; - normaliser.major = program->shader_version.major; -- normaliser.input_signature = &parser->shader_desc.input_signature; -- normaliser.output_signature = &parser->shader_desc.output_signature; -- normaliser.patch_constant_signature = &parser->shader_desc.patch_constant_signature; -+ normaliser.input_signature = &program->input_signature; -+ normaliser.output_signature = &program->output_signature; -+ normaliser.patch_constant_signature = &program->patch_constant_signature; - - for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) - { -@@ -1439,9 +1525,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse - } - } - -- if (!shader_signature_merge(&parser->shader_desc.input_signature, normaliser.input_range_map, false) -- || !shader_signature_merge(&parser->shader_desc.output_signature, normaliser.output_range_map, false) -- || !shader_signature_merge(&parser->shader_desc.patch_constant_signature, normaliser.pc_range_map, true)) -+ if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) -+ || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) -+ || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) - { - program->instructions = normaliser.instructions; - return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -1668,19 +1754,20 @@ static void remove_dead_code(struct vsir_program *program) - } - } - --static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { - unsigned int i; - -- for (i = 0; i < parser->program.instructions.count; ++i) -+ for (i = 0; i < program->instructions.count; ++i) - { -- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - struct vkd3d_shader_src_param *srcs; - - switch (ins->handler_idx) - { - case VKD3DSIH_TEX: -- if (!(srcs = shader_src_param_allocator_get(&parser->program.instructions.src_params, 3))) -+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(srcs, 0, sizeof(*srcs) * 3); - -@@ -1723,7 +1810,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser - case VKD3DSIH_TEXREG2AR: - case VKD3DSIH_TEXREG2GB: - case VKD3DSIH_TEXREG2RGB: -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " - "Combined sampler instruction %#x.", ins->handler_idx); - return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -1789,10 +1876,10 @@ struct cf_flattener_info - - struct cf_flattener - { -- struct vkd3d_shader_parser *parser; -+ struct vsir_program *program; - - struct vkd3d_shader_location location; -- bool allocation_failed; -+ enum vkd3d_result status; - - struct vkd3d_shader_instruction *instructions; - size_t instruction_capacity; -@@ -1812,13 +1899,20 @@ struct cf_flattener - size_t control_flow_info_size; - }; - -+static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error) -+{ -+ if (flattener->status != VKD3D_OK) -+ return; -+ flattener->status = error; -+} -+ - static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count) - { - if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity, - flattener->instruction_count + count, sizeof(*flattener->instructions))) - { - ERR("Failed to allocate instructions.\n"); -- flattener->allocation_failed = true; -+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); - return NULL; - } - return &flattener->instructions[flattener->instruction_count]; -@@ -1850,9 +1944,9 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ - { - struct vkd3d_shader_src_param *params; - -- if (!(params = vsir_program_get_src_params(&flattener->parser->program, count))) -+ if (!(params = vsir_program_get_src_params(flattener->program, count))) - { -- flattener->allocation_failed = true; -+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); - return NULL; - } - ins->src = params; -@@ -1866,10 +1960,10 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int - - if (!(ins = cf_flattener_require_space(flattener, 1))) - return; -- if (vsir_instruction_init_label(ins, &flattener->location, label_id, &flattener->parser->program)) -+ if (vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program)) - ++flattener->instruction_count; - else -- flattener->allocation_failed = true; -+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); - } - - /* For conditional branches, this returns the false target branch parameter. */ -@@ -1947,7 +2041,7 @@ static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_ - flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info))) - { - ERR("Failed to allocate control flow info structure.\n"); -- flattener->allocation_failed = true; -+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); - return NULL; - } - -@@ -2014,12 +2108,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla - flattener->block_names[block_id] = buffer.buffer; - } - --static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener) -+static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, -+ struct vkd3d_shader_message_context *message_context) - { - bool main_block_open, is_hull_shader, after_declarations_section; -- struct vkd3d_shader_parser *parser = flattener->parser; - struct vkd3d_shader_instruction_array *instructions; -- struct vsir_program *program = &parser->program; -+ struct vsir_program *program = flattener->program; - struct vkd3d_shader_instruction *dst_ins; - size_t i; - -@@ -2041,12 +2135,19 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - flattener->location = instruction->location; - - /* Declarations should occur before the first code block, which in hull shaders is marked by the first -- * phase instruction, and in all other shader types begins with the first label instruction. */ -- if (!after_declarations_section && !vsir_instruction_is_dcl(instruction) -- && instruction->handler_idx != VKD3DSIH_NOP) -+ * phase instruction, and in all other shader types begins with the first label instruction. -+ * Declaring an indexable temp with function scope is not considered a declaration, -+ * because it needs to live inside a function. */ -+ if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) - { -- after_declarations_section = true; -- cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); -+ bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP -+ && instruction->declaration.indexable_temp.has_function_scope; -+ -+ if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) -+ { -+ after_declarations_section = true; -+ cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); -+ } - } - - cf_info = flattener->control_flow_depth -@@ -2064,7 +2165,8 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - break; - - case VKD3DSIH_LABEL: -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ vkd3d_shader_error(message_context, &instruction->location, -+ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: Label instruction."); - return VKD3D_ERROR_NOT_IMPLEMENTED; - -@@ -2229,8 +2331,10 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) - { - WARN("Unexpected src swizzle %#x.\n", src->swizzle); -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, -+ vkd3d_shader_error(message_context, &instruction->location, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, - "The swizzle for a switch case value is not scalar X."); -+ cf_flattener_set_error(flattener, VKD3D_ERROR_INVALID_SHADER); - } - value = *src->reg.u.immconst_u32; - -@@ -2358,21 +2462,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - ++flattener->instruction_count; - } - -- return flattener->allocation_failed ? VKD3D_ERROR_OUT_OF_MEMORY : VKD3D_OK; -+ return flattener->status; - } - --static enum vkd3d_result flatten_control_flow_constructs(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { -- struct vsir_program *program = &parser->program; -- struct cf_flattener flattener = {0}; -+ struct cf_flattener flattener = {.program = program}; - enum vkd3d_result result; - -- flattener.parser = parser; -- result = cf_flattener_iterate_instruction_array(&flattener); -- -- if (result >= 0) -+ if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) - { -- vkd3d_free(parser->program.instructions.elements); -+ vkd3d_free(program->instructions.elements); - program->instructions.elements = flattener.instructions; - program->instructions.capacity = flattener.instruction_capacity; - program->instructions.count = flattener.instruction_count; -@@ -2548,97 +2649,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) - } - } - -- /* Second subpass: creating new blocks might have broken -- * references in PHI instructions, so we use the block map to fix -- * them. */ -- current_label = 0; -- for (i = 0; i < ins_count; ++i) -- { -- struct vkd3d_shader_instruction *ins = &instructions[i]; -- struct vkd3d_shader_src_param *new_src; -- unsigned int j, l, new_src_count = 0; -- -- switch (ins->handler_idx) -- { -- case VKD3DSIH_LABEL: -- current_label = label_from_src_param(&ins->src[0]); -- continue; -- -- case VKD3DSIH_PHI: -- break; -- -- default: -- continue; -- } -- -- /* First count how many source parameters we need. */ -- for (j = 0; j < ins->src_count; j += 2) -- { -- unsigned int source_label = label_from_src_param(&ins->src[j + 1]); -- size_t k, match_count = 0; -- -- for (k = 0; k < map_count; ++k) -- { -- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; -- -- if (mapping->switch_label == source_label && mapping->target_label == current_label) -- match_count += 1; -- } -- -- new_src_count += (match_count != 0) ? 2 * match_count : 2; -- } -- -- assert(new_src_count >= ins->src_count); -- -- /* Allocate more source parameters if needed. */ -- if (new_src_count == ins->src_count) -- { -- new_src = ins->src; -- } -- else -- { -- if (!(new_src = vsir_program_get_src_params(program, new_src_count))) -- { -- ERR("Failed to allocate %u source parameters.\n", new_src_count); -- goto fail; -- } -- } -- -- /* Then do the copy. */ -- for (j = 0, l = 0; j < ins->src_count; j += 2) -- { -- unsigned int source_label = label_from_src_param(&ins->src[j + 1]); -- size_t k, match_count = 0; -- -- for (k = 0; k < map_count; ++k) -- { -- struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k]; -- -- if (mapping->switch_label == source_label && mapping->target_label == current_label) -- { -- match_count += 1; -- -- new_src[l] = ins->src[j]; -- new_src[l + 1] = ins->src[j + 1]; -- new_src[l + 1].reg.idx[0].offset = mapping->if_label; -- l += 2; -- } -- } -- -- if (match_count == 0) -- { -- new_src[l] = ins->src[j]; -- new_src[l + 1] = ins->src[j + 1]; -- l += 2; -- } -- } -- -- assert(l == new_src_count); -- -- ins->src_count = new_src_count; -- ins->src = new_src; -- } -- - vkd3d_free(program->instructions.elements); - vkd3d_free(block_map); - program->instructions.elements = instructions; -@@ -2656,145 +2666,139 @@ fail: - return VKD3D_ERROR_OUT_OF_MEMORY; - } - --static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src); -+struct ssas_to_temps_alloc -+{ -+ unsigned int *table; -+ unsigned int next_temp_idx; -+}; - --/* This is idempotent: it can be safely applied more than once on the -- * same register. */ --static void materialize_ssas_to_temps_process_reg(struct vkd3d_shader_parser *parser, struct vkd3d_shader_register *reg) -+static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count) - { -- unsigned int i; -+ size_t i = ssa_count * sizeof(*alloc->table); - -- if (reg->type == VKD3DSPR_SSA) -+ if (!(alloc->table = vkd3d_malloc(i))) - { -- reg->type = VKD3DSPR_TEMP; -- reg->idx[0].offset += parser->program.temp_count; -+ ERR("Failed to allocate SSA table.\n"); -+ return false; - } -+ memset(alloc->table, 0xff, i); - -- for (i = 0; i < reg->idx_count; ++i) -- if (reg->idx[i].rel_addr) -- materialize_ssas_to_temps_process_src_param(parser, reg->idx[i].rel_addr); --} -- --static void materialize_ssas_to_temps_process_dst_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_dst_param *dst) --{ -- materialize_ssas_to_temps_process_reg(parser, &dst->reg); --} -- --static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src) --{ -- materialize_ssas_to_temps_process_reg(parser, &src->reg); -+ alloc->next_temp_idx = temp_count; -+ return true; - } - --static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, -- unsigned int label) -+/* This is idempotent: it can be safely applied more than once on the -+ * same register. */ -+static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc, -+ struct vkd3d_shader_register *reg) - { - unsigned int i; - -- assert(ins->handler_idx == VKD3DSIH_PHI); -- -- for (i = 0; i < ins->src_count; i += 2) -+ if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX) - { -- if (label_from_src_param(&ins->src[i + 1]) == label) -- return &ins->src[i]; -+ reg->type = VKD3DSPR_TEMP; -+ reg->idx[0].offset = alloc->table[reg->idx[0].offset]; - } - -- vkd3d_unreachable(); -+ for (i = 0; i < reg->idx_count; ++i) -+ if (reg->idx[i].rel_addr) -+ materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg); - } - --static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser *parser, -- struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, -- const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, -- const struct vkd3d_shader_src_param *source, bool invert) -+struct ssas_to_temps_block_info - { -- struct vkd3d_shader_src_param *src; -- struct vkd3d_shader_dst_param *dst; -- -- if (!vsir_instruction_init_with_params(&parser->program, instruction, loc, -- cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) -- return false; -- -- dst = instruction->dst; -- src = instruction->src; -- -- dst[0] = *dest; -- materialize_ssas_to_temps_process_dst_param(parser, &dst[0]); -+ struct phi_incoming_to_temp -+ { -+ struct vkd3d_shader_src_param *src; -+ struct vkd3d_shader_dst_param *dst; -+ } *incomings; -+ size_t incoming_capacity; -+ size_t incoming_count; -+}; - -- assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); -- assert(dst[0].modifiers == 0); -- assert(dst[0].shift == 0); -+static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *block_info, -+ size_t count) -+{ -+ size_t i; - -- if (cond) -- { -- src[0] = *cond; -- src[1 + invert] = *source; -- memset(&src[2 - invert], 0, sizeof(src[2 - invert])); -- src[2 - invert].reg = dst[0].reg; -- materialize_ssas_to_temps_process_src_param(parser, &src[1]); -- materialize_ssas_to_temps_process_src_param(parser, &src[2]); -- } -- else -- { -- src[0] = *source; -- materialize_ssas_to_temps_process_src_param(parser, &src[0]); -- } -+ for (i = 0; i < count; ++i) -+ vkd3d_free(block_info[i].incomings); - -- return true; -+ vkd3d_free(block_info); - } - --static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *parser) -+static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program) - { -+ size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; -+ struct ssas_to_temps_block_info *info, *block_info = NULL; - struct vkd3d_shader_instruction *instructions = NULL; -- struct materialize_ssas_to_temps_block_data -- { -- size_t phi_begin; -- size_t phi_count; -- } *block_index = NULL; -- size_t ins_capacity = 0, ins_count = 0, i; -+ struct ssas_to_temps_alloc alloc = {0}; - unsigned int current_label = 0; - -- if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) -- goto fail; -- -- if (!(block_index = vkd3d_calloc(parser->program.block_count, sizeof(*block_index)))) -+ if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) - { -- ERR("Failed to allocate block index.\n"); -+ ERR("Failed to allocate block info array.\n"); - goto fail; - } - -- for (i = 0; i < parser->program.instructions.count; ++i) -+ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) -+ goto fail; -+ -+ for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i) - { -- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ unsigned int j, temp_idx; - -- switch (ins->handler_idx) -+ /* Only phi src/dst SSA values need be converted here. Structurisation may -+ * introduce new cases of undominated SSA use, which will be handled later. */ -+ if (ins->handler_idx != VKD3DSIH_PHI) -+ continue; -+ ++phi_count; -+ -+ temp_idx = alloc.next_temp_idx++; -+ -+ for (j = 0; j < ins->src_count; j += 2) - { -- case VKD3DSIH_LABEL: -- current_label = label_from_src_param(&ins->src[0]); -- break; -+ struct phi_incoming_to_temp *incoming; -+ unsigned int label; - -- case VKD3DSIH_PHI: -- assert(current_label != 0); -- assert(i != 0); -- if (block_index[current_label - 1].phi_begin == 0) -- block_index[current_label - 1].phi_begin = i; -- block_index[current_label - 1].phi_count += 1; -- break; -+ label = label_from_src_param(&ins->src[j + 1]); -+ assert(label); - -- default: -- current_label = 0; -- break; -+ info = &block_info[label - 1]; -+ -+ if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1, -+ sizeof(*info->incomings)))) -+ goto fail; -+ -+ incoming = &info->incomings[info->incoming_count++]; -+ incoming->src = &ins->src[j]; -+ incoming->dst = ins->dst; -+ -+ alloc.table[ins->dst->reg.idx[0].offset] = temp_idx; -+ -+ ++incoming_count; - } -+ -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg); - } - -- for (i = 0; i < parser->program.instructions.count; ++i) -+ if (!phi_count) -+ goto done; -+ -+ if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count)) -+ goto fail; -+ -+ for (i = 0; i < program->instructions.count; ++i) - { -- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; -+ struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i]; - size_t j; - - for (j = 0; j < ins->dst_count; ++j) -- materialize_ssas_to_temps_process_dst_param(parser, &ins->dst[j]); -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); - - for (j = 0; j < ins->src_count; ++j) -- materialize_ssas_to_temps_process_src_param(parser, &ins->src[j]); -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); - - switch (ins->handler_idx) - { -@@ -2803,62 +2807,21 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p - break; - - case VKD3DSIH_BRANCH: -- { -- if (vsir_register_is_label(&ins->src[0].reg)) -- { -- const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1]; -- -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count)) -- goto fail; -- -- for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j) -- { -- const struct vkd3d_shader_src_param *source; -- -- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); -- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, -- &parser->program.instructions.elements[j].dst[0], NULL, source, false)) -- goto fail; -+ case VKD3DSIH_SWITCH_MONOLITHIC: -+ info = &block_info[current_label - 1]; - -- ++ins_count; -- } -- } -- else -+ for (j = 0; j < info->incoming_count; ++j) - { -- struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1], -- *data_false = &block_index[label_from_src_param(&ins->src[2]) - 1]; -- const struct vkd3d_shader_src_param *cond = &ins->src[0]; -+ struct phi_incoming_to_temp *incoming = &info->incomings[j]; - -- if (!reserve_instructions(&instructions, &ins_capacity, -- ins_count + data_true->phi_count + data_false->phi_count)) -+ mov_ins = &instructions[ins_count++]; -+ if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0)) - goto fail; -- -- for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j) -- { -- const struct vkd3d_shader_src_param *source; -- -- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); -- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, -- &parser->program.instructions.elements[j].dst[0], cond, source, false)) -- goto fail; -- -- ++ins_count; -- } -- -- for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j) -- { -- const struct vkd3d_shader_src_param *source; -- -- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); -- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, -- &parser->program.instructions.elements[j].dst[0], cond, source, true)) -- goto fail; -- -- ++ins_count; -- } -+ *mov_ins->dst = *incoming->dst; -+ mov_ins->src = incoming->src; -+ mov_ins->src_count = 1; - } - break; -- } - - case VKD3DSIH_PHI: - continue; -@@ -2867,162 +2830,55 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p - break; - } - -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) -- goto fail; -- - instructions[ins_count++] = *ins; - } - -- vkd3d_free(parser->program.instructions.elements); -- vkd3d_free(block_index); -- parser->program.instructions.elements = instructions; -- parser->program.instructions.capacity = ins_capacity; -- parser->program.instructions.count = ins_count; -- parser->program.temp_count += parser->program.ssa_count; -- parser->program.ssa_count = 0; -+ vkd3d_free(program->instructions.elements); -+ program->instructions.elements = instructions; -+ program->instructions.capacity = ins_capacity; -+ program->instructions.count = ins_count; -+ program->temp_count = alloc.next_temp_idx; -+done: -+ ssas_to_temps_block_info_cleanup(block_info, program->block_count); -+ vkd3d_free(alloc.table); - - return VKD3D_OK; - - fail: - vkd3d_free(instructions); -- vkd3d_free(block_index); -+ ssas_to_temps_block_info_cleanup(block_info, program->block_count); -+ vkd3d_free(alloc.table); - - return VKD3D_ERROR_OUT_OF_MEMORY; - } - --static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *parser) -+struct vsir_block_list - { -- const unsigned int block_temp_idx = parser->program.temp_count; -- struct vkd3d_shader_instruction *instructions = NULL; -- const struct vkd3d_shader_location no_loc = {0}; -- size_t ins_capacity = 0, ins_count = 0, i; -- bool first_label_found = false; -+ struct vsir_block **blocks; -+ size_t count, capacity; -+}; - -- if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) -- goto fail; -+static void vsir_block_list_init(struct vsir_block_list *list) -+{ -+ memset(list, 0, sizeof(*list)); -+} -+ -+static void vsir_block_list_cleanup(struct vsir_block_list *list) -+{ -+ vkd3d_free(list->blocks); -+} - -- for (i = 0; i < parser->program.instructions.count; ++i) -+static enum vkd3d_result vsir_block_list_add_checked(struct vsir_block_list *list, struct vsir_block *block) -+{ -+ if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) - { -- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; -+ ERR("Cannot extend block list.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } - -- switch (ins->handler_idx) -- { -- case VKD3DSIH_PHI: -- case VKD3DSIH_SWITCH_MONOLITHIC: -- vkd3d_unreachable(); -- -- case VKD3DSIH_LABEL: -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 4)) -- goto fail; -- -- if (!first_label_found) -- { -- first_label_found = true; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) -- goto fail; -- dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); -- src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); -- ins_count++; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0)) -- goto fail; -- ins_count++; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1)) -- goto fail; -- src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx); -- ins_count++; -- } -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1)) -- goto fail; -- src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); -- ins_count++; -- break; -- -- case VKD3DSIH_BRANCH: -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 2)) -- goto fail; -- -- if (vsir_register_is_label(&ins->src[0].reg)) -- { -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) -- goto fail; -- dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); -- src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); -- ins_count++; -- } -- else -- { -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3)) -- goto fail; -- dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); -- instructions[ins_count].src[0] = ins->src[0]; -- src_param_init_const_uint(&instructions[ins_count].src[1], label_from_src_param(&ins->src[1])); -- src_param_init_const_uint(&instructions[ins_count].src[2], label_from_src_param(&ins->src[2])); -- ins_count++; -- } -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0)) -- goto fail; -- ins_count++; -- break; -- -- case VKD3DSIH_RET: -- default: -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) -- goto fail; -- -- instructions[ins_count++] = *ins; -- break; -- } -- } -- -- assert(first_label_found); -- -- if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3)) -- goto fail; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0)) -- goto fail; -- ins_count++; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0)) -- goto fail; -- ins_count++; -- -- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0)) -- goto fail; -- ins_count++; -- -- vkd3d_free(parser->program.instructions.elements); -- parser->program.instructions.elements = instructions; -- parser->program.instructions.capacity = ins_capacity; -- parser->program.instructions.count = ins_count; -- parser->program.temp_count += 1; -+ list->blocks[list->count++] = block; - - return VKD3D_OK; -- --fail: -- vkd3d_free(instructions); -- return VKD3D_ERROR_OUT_OF_MEMORY; --} -- --struct vsir_block_list --{ -- struct vsir_block **blocks; -- size_t count, capacity; --}; -- --static void vsir_block_list_init(struct vsir_block_list *list) --{ -- memset(list, 0, sizeof(*list)); --} -- --static void vsir_block_list_cleanup(struct vsir_block_list *list) --{ -- vkd3d_free(list->blocks); - } - - static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) -@@ -3031,22 +2887,21 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc - - for (i = 0; i < list->count; ++i) - if (block == list->blocks[i]) -- return VKD3D_OK; -- -- if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) -- { -- ERR("Cannot extend block list.\n"); -- return VKD3D_ERROR_OUT_OF_MEMORY; -- } -+ return VKD3D_FALSE; - -- list->blocks[list->count++] = block; -+ return vsir_block_list_add_checked(list, block); -+} - -- return VKD3D_OK; -+/* It is guaranteed that the relative order is kept. */ -+static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) -+{ -+ --list->count; -+ memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx) * sizeof(*list->blocks)); - } - - struct vsir_block - { -- unsigned int label; -+ unsigned int label, order_pos; - /* `begin' points to the instruction immediately following the - * LABEL that introduces the block. `end' points to the terminator - * instruction (either BRANCH or RET). They can coincide, meaning -@@ -3063,8 +2918,7 @@ static enum vkd3d_result vsir_block_init(struct vsir_block *block, unsigned int - if (block_count > SIZE_MAX - (sizeof(*block->dominates) * CHAR_BIT - 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -- block_count = align(block_count, sizeof(*block->dominates) * CHAR_BIT); -- byte_count = block_count / CHAR_BIT; -+ byte_count = VKD3D_BITMAP_SIZE(block_count) * sizeof(*block->dominates); - - assert(label); - memset(block, 0, sizeof(*block)); -@@ -3089,12 +2943,211 @@ static void vsir_block_cleanup(struct vsir_block *block) - vkd3d_free(block->dominates); - } - -+static int block_compare(const void *ptr1, const void *ptr2) -+{ -+ const struct vsir_block *block1 = *(const struct vsir_block **)ptr1; -+ const struct vsir_block *block2 = *(const struct vsir_block **)ptr2; -+ -+ return vkd3d_u32_compare(block1->label, block2->label); -+} -+ -+static void vsir_block_list_sort(struct vsir_block_list *list) -+{ -+ qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); -+} -+ -+static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block) -+{ -+ return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); -+} -+ -+struct vsir_cfg_structure_list -+{ -+ struct vsir_cfg_structure *structures; -+ size_t count, capacity; -+ unsigned int end; -+}; -+ -+struct vsir_cfg_structure -+{ -+ enum vsir_cfg_structure_type -+ { -+ /* Execute a block of the original VSIR program. */ -+ STRUCTURE_TYPE_BLOCK, -+ /* Execute a loop, which is identified by an index. */ -+ STRUCTURE_TYPE_LOOP, -+ /* Execute a selection construct. */ -+ STRUCTURE_TYPE_SELECTION, -+ /* Execute a `return' or a (possibly) multilevel `break' or -+ * `continue', targeting a loop by its index. If `condition' -+ * is non-NULL, then the jump is conditional (this is -+ * currently not allowed for `return'). */ -+ STRUCTURE_TYPE_JUMP, -+ } type; -+ union -+ { -+ struct vsir_block *block; -+ struct vsir_cfg_structure_loop -+ { -+ struct vsir_cfg_structure_list body; -+ unsigned idx; -+ bool needs_trampoline; -+ struct vsir_cfg_structure *outer_loop; -+ } loop; -+ struct vsir_cfg_structure_selection -+ { -+ struct vkd3d_shader_src_param *condition; -+ struct vsir_cfg_structure_list if_body; -+ struct vsir_cfg_structure_list else_body; -+ bool invert_condition; -+ } selection; -+ struct vsir_cfg_structure_jump -+ { -+ enum vsir_cfg_jump_type -+ { -+ /* NONE is available as an intermediate value, but it -+ * is not allowed in valid structured programs. */ -+ JUMP_NONE, -+ JUMP_BREAK, -+ JUMP_CONTINUE, -+ JUMP_RET, -+ } type; -+ unsigned int target; -+ struct vkd3d_shader_src_param *condition; -+ bool invert_condition; -+ bool needs_launcher; -+ } jump; -+ } u; -+}; -+ -+static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type); -+static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure); -+ -+static void vsir_cfg_structure_list_cleanup(struct vsir_cfg_structure_list *list) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < list->count; ++i) -+ vsir_cfg_structure_cleanup(&list->structures[i]); -+ vkd3d_free(list->structures); -+} -+ -+static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg_structure_list *list, -+ enum vsir_cfg_structure_type type) -+{ -+ struct vsir_cfg_structure *ret; -+ -+ if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + 1, -+ sizeof(*list->structures))) -+ return NULL; -+ -+ ret = &list->structures[list->count++]; -+ -+ vsir_cfg_structure_init(ret, type); -+ -+ return ret; -+} -+ -+static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_cfg_structure_list *list, -+ struct vsir_cfg_structure *begin, size_t size) -+{ -+ if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + size, -+ sizeof(*list->structures))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ memcpy(&list->structures[list->count], begin, size * sizeof(*begin)); -+ -+ list->count += size; -+ -+ return VKD3D_OK; -+} -+ -+static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type) -+{ -+ memset(structure, 0, sizeof(*structure)); -+ structure->type = type; -+} -+ -+static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) -+{ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_LOOP: -+ vsir_cfg_structure_list_cleanup(&structure->u.loop.body); -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ vsir_cfg_structure_list_cleanup(&structure->u.selection.if_body); -+ vsir_cfg_structure_list_cleanup(&structure->u.selection.else_body); -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+struct vsir_cfg_emit_target -+{ -+ struct vkd3d_shader_instruction *instructions; -+ size_t ins_capacity, ins_count; -+ unsigned int jump_target_temp_idx; -+ unsigned int temp_count; -+}; -+ - struct vsir_cfg - { -+ struct vkd3d_shader_message_context *message_context; - struct vsir_program *program; -+ size_t function_begin; -+ size_t function_end; - struct vsir_block *blocks; - struct vsir_block *entry; - size_t block_count; -+ struct vkd3d_string_buffer debug_buffer; -+ -+ struct vsir_block_list *loops; -+ size_t loops_count, loops_capacity; -+ size_t *loops_by_header; -+ -+ struct vsir_block_list order; -+ struct cfg_loop_interval -+ { -+ /* `begin' is the position of the first block of the loop in -+ * the topological sort; `end' is the position of the first -+ * block after the loop. In other words, `begin' is where a -+ * `continue' instruction would jump and `end' is where a -+ * `break' instruction would jump. */ -+ unsigned int begin, end; -+ /* Each loop interval can be natural or synthetic. Natural -+ * intervals are added to represent loops given by CFG back -+ * edges. Synthetic intervals do not correspond to loops in -+ * the input CFG, but are added to leverage their `break' -+ * instruction in order to execute forward edges. -+ * -+ * For a synthetic loop interval it's not really important -+ * which one is the `begin' block, since we don't need to -+ * execute `continue' for them. So we have some leeway for -+ * moving it provided that these conditions are met: 1. the -+ * interval must contain all `break' instructions that target -+ * it, which in practice means that `begin' can be moved -+ * backward and not forward; 2. intervals must remain properly -+ * nested (for each pair of intervals, either one contains the -+ * other or they are disjoint). -+ * -+ * Subject to these conditions, we try to reuse the same loop -+ * as much as possible (if many forward edges target the same -+ * block), but we still try to keep `begin' as forward as -+ * possible, to keep the loop scope as small as possible. */ -+ bool synthetic; -+ /* The number of jump instructions (both conditional and -+ * unconditional) that target this loop. */ -+ unsigned int target_count; -+ } *loop_intervals; -+ size_t loop_interval_count, loop_interval_capacity; -+ -+ struct vsir_cfg_structure_list structured_program; -+ -+ struct vsir_cfg_emit_target *target; - }; - - static void vsir_cfg_cleanup(struct vsir_cfg *cfg) -@@ -3104,7 +3157,44 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) - for (i = 0; i < cfg->block_count; ++i) - vsir_block_cleanup(&cfg->blocks[i]); - -+ for (i = 0; i < cfg->loops_count; ++i) -+ vsir_block_list_cleanup(&cfg->loops[i]); -+ -+ vsir_block_list_cleanup(&cfg->order); -+ -+ vsir_cfg_structure_list_cleanup(&cfg->structured_program); -+ - vkd3d_free(cfg->blocks); -+ vkd3d_free(cfg->loops); -+ vkd3d_free(cfg->loops_by_header); -+ vkd3d_free(cfg->loop_intervals); -+ -+ if (TRACE_ON()) -+ vkd3d_string_buffer_cleanup(&cfg->debug_buffer); -+} -+ -+static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, -+ unsigned int end, bool synthetic) -+{ -+ struct cfg_loop_interval *interval; -+ -+ if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity, -+ cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ interval = &cfg->loop_intervals[cfg->loop_interval_count++]; -+ -+ interval->begin = begin; -+ interval->end = end; -+ interval->synthetic = synthetic; -+ interval->target_count = 0; -+ -+ return VKD3D_OK; -+} -+ -+static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) -+{ -+ return bitmap_is_set(b1->dominates, b2->label - 1); - } - - static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, -@@ -3153,260 +3243,1917 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) - vkd3d_unreachable(); - } - -- TRACE(" n%u [label=\"%u\", shape=\"%s\"];\n", block->label, block->label, shape); -+ TRACE(" n%u [label=\"%u\", shape=\"%s\"];\n", block->label, block->label, shape); -+ -+ for (j = 0; j < block->successors.count; ++j) -+ TRACE(" n%u -> n%u;\n", block->label, block->successors.blocks[j]->label); -+ } -+ -+ TRACE("}\n"); -+} -+ -+static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list); -+ -+static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure *structure) -+{ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ TRACE("%sblock %u\n", cfg->debug_buffer.buffer, structure->u.block->label); -+ break; -+ -+ case STRUCTURE_TYPE_LOOP: -+ TRACE("%s%u : loop {\n", cfg->debug_buffer.buffer, structure->u.loop.idx); -+ -+ vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body); -+ -+ TRACE("%s} # %u%s\n", cfg->debug_buffer.buffer, structure->u.loop.idx, -+ structure->u.loop.needs_trampoline ? ", tramp" : ""); -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ TRACE("%sif {\n", cfg->debug_buffer.buffer); -+ -+ vsir_cfg_structure_list_dump(cfg, &structure->u.selection.if_body); -+ -+ if (structure->u.selection.else_body.count == 0) -+ { -+ TRACE("%s}\n", cfg->debug_buffer.buffer); -+ } -+ else -+ { -+ TRACE("%s} else {\n", cfg->debug_buffer.buffer); -+ -+ vsir_cfg_structure_list_dump(cfg, &structure->u.selection.else_body); -+ -+ TRACE("%s}\n", cfg->debug_buffer.buffer); -+ } -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ { -+ const char *type_str; -+ -+ switch (structure->u.jump.type) -+ { -+ case JUMP_RET: -+ TRACE("%sret\n", cfg->debug_buffer.buffer); -+ return; -+ -+ case JUMP_BREAK: -+ type_str = "break"; -+ break; -+ -+ case JUMP_CONTINUE: -+ type_str = "continue"; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ TRACE("%s%s%s %u%s\n", cfg->debug_buffer.buffer, type_str, -+ structure->u.jump.condition ? "c" : "", structure->u.jump.target, -+ structure->u.jump.needs_launcher ? " # launch" : ""); -+ break; -+ } -+ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) -+{ -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " "); -+ -+ for (i = 0; i < list->count; ++i) -+ vsir_cfg_structure_dump(cfg, &list->structures[i]); -+ -+ vkd3d_string_buffer_truncate(&cfg->debug_buffer, cfg->debug_buffer.content_size - 2); -+} -+ -+static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < cfg->structured_program.count; ++i) -+ vsir_cfg_structure_dump(cfg, &cfg->structured_program.structures[i]); -+} -+ -+static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, -+ size_t *pos) -+{ -+ struct vsir_block *current_block = NULL; -+ enum vkd3d_result ret; -+ size_t i; -+ -+ memset(cfg, 0, sizeof(*cfg)); -+ cfg->message_context = message_context; -+ cfg->program = program; -+ cfg->block_count = program->block_count; -+ cfg->target = target; -+ cfg->function_begin = *pos; -+ -+ vsir_block_list_init(&cfg->order); -+ -+ if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (TRACE_ON()) -+ vkd3d_string_buffer_init(&cfg->debug_buffer); -+ -+ for (i = *pos; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; -+ bool finish = false; -+ -+ switch (instruction->handler_idx) -+ { -+ case VKD3DSIH_PHI: -+ case VKD3DSIH_SWITCH_MONOLITHIC: -+ vkd3d_unreachable(); -+ -+ case VKD3DSIH_LABEL: -+ { -+ unsigned int label = label_from_src_param(&instruction->src[0]); -+ -+ assert(!current_block); -+ assert(label > 0); -+ assert(label <= cfg->block_count); -+ current_block = &cfg->blocks[label - 1]; -+ assert(current_block->label == 0); -+ if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) -+ goto fail; -+ current_block->begin = &program->instructions.elements[i + 1]; -+ if (!cfg->entry) -+ cfg->entry = current_block; -+ break; -+ } -+ -+ case VKD3DSIH_BRANCH: -+ case VKD3DSIH_RET: -+ assert(current_block); -+ current_block->end = instruction; -+ current_block = NULL; -+ break; -+ -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ assert(!current_block); -+ finish = true; -+ break; -+ -+ default: -+ break; -+ } -+ -+ if (finish) -+ break; -+ } -+ -+ *pos = i; -+ cfg->function_end = *pos; -+ -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ -+ if (block->label == 0) -+ continue; -+ -+ switch (block->end->handler_idx) -+ { -+ case VKD3DSIH_RET: -+ break; -+ -+ case VKD3DSIH_BRANCH: -+ if (vsir_register_is_label(&block->end->src[0].reg)) -+ { -+ if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[0])) < 0) -+ goto fail; -+ } -+ else -+ { -+ if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[1])) < 0) -+ goto fail; -+ -+ if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[2])) < 0) -+ goto fail; -+ } -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ } -+ -+ if (TRACE_ON()) -+ vsir_cfg_dump_dot(cfg); -+ -+ return VKD3D_OK; -+ -+fail: -+ vsir_cfg_cleanup(cfg); -+ -+ return ret; -+} -+ -+/* Block A dominates block B if every path from the entry point to B -+ * must pass through A. Naively compute the set of blocks that are -+ * dominated by `reference' by running a graph visit starting from the -+ * entry point (which must be the initial value of `current') and -+ * avoiding `reference'. Running this for all the blocks takes -+ * quadratic time: if in the future something better is sought after, -+ * the standard tool seems to be the Lengauer-Tarjan algorithm. */ -+static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, struct vsir_block *reference) -+{ -+ size_t i; -+ -+ assert(current->label != 0); -+ -+ if (current == reference) -+ return; -+ -+ if (!bitmap_is_set(reference->dominates, current->label - 1)) -+ return; -+ -+ bitmap_clear(reference->dominates, current->label - 1); -+ -+ for (i = 0; i < current->successors.count; ++i) -+ vsir_cfg_compute_dominators_recurse(current->successors.blocks[i], reference); -+} -+ -+static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) -+{ -+ size_t i, j; -+ -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ -+ if (block->label == 0) -+ continue; -+ -+ vsir_cfg_compute_dominators_recurse(cfg->entry, block); -+ -+ if (TRACE_ON()) -+ { -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates:", block->label); -+ for (j = 0; j < cfg->block_count; j++) -+ { -+ struct vsir_block *block2 = &cfg->blocks[j]; -+ -+ if (block2->label == 0) -+ continue; -+ -+ if (vsir_block_dominates(block, block2)) -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); -+ } -+ TRACE("%s\n", cfg->debug_buffer.buffer); -+ vkd3d_string_buffer_clear(&cfg->debug_buffer); -+ } -+ } -+} -+ -+/* A back edge is an edge X -> Y for which block Y dominates block -+ * X. All the other edges are forward edges, and it is required that -+ * the input CFG is reducible, i.e., it is acyclic once you strip away -+ * the back edges. -+ * -+ * Each back edge X -> Y defines a loop: block X is the header block, -+ * block Y is the back edge block, and the loop consists of all the -+ * blocks which are dominated by the header block and have a path to -+ * the back edge block that doesn't pass through the header block -+ * (including the header block itself). It can be proved that all the -+ * blocks in such a path (connecting a loop block to the back edge -+ * block without passing through the header block) belong to the same -+ * loop. -+ * -+ * If the input CFG is reducible its loops are properly nested (i.e., -+ * each two loops are either disjoint or one is contained in the -+ * other), provided that each block has at most one incoming back -+ * edge. If this condition does not hold, a synthetic block can be -+ * introduced as the only back edge block for the given header block, -+ * with all the previous back edge now being forward edges to the -+ * synthetic block. This is not currently implemented (but it is -+ * rarely found in practice anyway). */ -+static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block, -+ struct vsir_block *header) -+{ -+ enum vkd3d_result ret; -+ size_t i; -+ -+ if ((ret = vsir_block_list_add(loop, block)) < 0) -+ return ret; -+ -+ if (ret == VKD3D_FALSE || block == header) -+ return VKD3D_OK; -+ -+ for (i = 0; i < block->predecessors.count; ++i) -+ { -+ if ((ret = vsir_cfg_scan_loop(loop, block->predecessors.blocks[i], header)) < 0) -+ return ret; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) -+{ -+ size_t i, j, k; -+ -+ if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ memset(cfg->loops_by_header, 0xff, cfg->block_count * sizeof(*cfg->loops_by_header)); -+ -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ -+ if (block->label == 0) -+ continue; -+ -+ for (j = 0; j < block->successors.count; ++j) -+ { -+ struct vsir_block *header = block->successors.blocks[j]; -+ struct vsir_block_list *loop; -+ enum vkd3d_result ret; -+ -+ /* Is this a back edge? */ -+ if (!vsir_block_dominates(header, block)) -+ continue; -+ -+ if (!vkd3d_array_reserve((void **)&cfg->loops, &cfg->loops_capacity, cfg->loops_count + 1, sizeof(*cfg->loops))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ loop = &cfg->loops[cfg->loops_count]; -+ vsir_block_list_init(loop); -+ -+ if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) -+ return ret; -+ -+ vsir_block_list_sort(loop); -+ -+ if (TRACE_ON()) -+ { -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); -+ -+ for (k = 0; k < loop->count; ++k) -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); -+ -+ TRACE("%s\n", cfg->debug_buffer.buffer); -+ vkd3d_string_buffer_clear(&cfg->debug_buffer); -+ } -+ -+ if (cfg->loops_by_header[header->label - 1] != SIZE_MAX) -+ { -+ FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label); -+ vkd3d_shader_error(cfg->message_context, &header->begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Block %u is header to more than one loop, this is not implemented.", header->label); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ -+ cfg->loops_by_header[header->label - 1] = cfg->loops_count; -+ -+ ++cfg->loops_count; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+struct vsir_cfg_node_sorter -+{ -+ struct vsir_cfg *cfg; -+ struct vsir_cfg_node_sorter_stack_item -+ { -+ struct vsir_block_list *loop; -+ unsigned int seen_count; -+ unsigned int begin; -+ } *stack; -+ size_t stack_count, stack_capacity; -+ struct vsir_block_list available_blocks; -+}; -+ -+/* Topologically sort the blocks according to the forward edges. By -+ * definition if the input CFG is reducible then its forward edges -+ * form a DAG, so a topological sorting exists. In order to compute it -+ * we keep an array with the incoming degree for each block and an -+ * available list of all the blocks whose incoming degree has reached -+ * zero. At each step we pick a block from the available list and -+ * strip it away from the graph, updating the incoming degrees and -+ * available list. -+ * -+ * In principle at each step we can pick whatever node we want from -+ * the available list, and will get a topological sort -+ * anyway. However, we use these two criteria to give to the computed -+ * order additional properties: -+ * -+ * 1. we keep track of which loops we're into, and pick blocks -+ * belonging to the current innermost loop, so that loops are kept -+ * contiguous in the order; this can always be done when the input -+ * CFG is reducible; -+ * -+ * 2. subject to the requirement above, we always pick the most -+ * recently added block to the available list, because this tends -+ * to keep related blocks and require fewer control flow -+ * primitives. -+ */ -+static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) -+{ -+ struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; -+ unsigned int *in_degrees = NULL; -+ enum vkd3d_result ret; -+ size_t i; -+ -+ if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ -+ if (block->label == 0) -+ { -+ in_degrees[i] = UINT_MAX; -+ continue; -+ } -+ -+ in_degrees[i] = block->predecessors.count; -+ -+ /* Do not count back edges. */ -+ if (cfg->loops_by_header[i] != SIZE_MAX) -+ { -+ assert(in_degrees[i] > 0); -+ in_degrees[i] -= 1; -+ } -+ -+ if (in_degrees[i] == 0 && block != cfg->entry) -+ { -+ WARN("Unexpected entry point %u.\n", block->label); -+ vkd3d_shader_error(cfg->message_context, &block->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Block %u is unreachable from the entry point.", block->label); -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ } -+ -+ if (in_degrees[cfg->entry->label - 1] != 0) -+ { -+ WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); -+ vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]); -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ -+ vsir_block_list_init(&sorter.available_blocks); -+ -+ if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, cfg->entry)) < 0) -+ goto fail; -+ -+ while (sorter.available_blocks.count != 0) -+ { -+ struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL; -+ struct vsir_block *block; -+ size_t new_seen_count; -+ -+ if (sorter.stack_count != 0) -+ inner_stack_item = &sorter.stack[sorter.stack_count - 1]; -+ -+ for (i = sorter.available_blocks.count - 1; ; --i) -+ { -+ if (i == SIZE_MAX) -+ { -+ ERR("Couldn't find any viable next block, is the input CFG reducible?\n"); -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ -+ block = sorter.available_blocks.blocks[i]; -+ -+ if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block)) -+ break; -+ } -+ -+ /* If the node is a loop header, open the loop. */ -+ if (sorter.cfg->loops_by_header[block->label - 1] != SIZE_MAX) -+ { -+ struct vsir_block_list *loop = &sorter.cfg->loops[sorter.cfg->loops_by_header[block->label - 1]]; -+ -+ if (loop) -+ { -+ if (!vkd3d_array_reserve((void **)&sorter.stack, &sorter.stack_capacity, -+ sorter.stack_count + 1, sizeof(*sorter.stack))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ inner_stack_item = &sorter.stack[sorter.stack_count++]; -+ inner_stack_item->loop = loop; -+ inner_stack_item->seen_count = 0; -+ inner_stack_item->begin = sorter.cfg->order.count; -+ } -+ } -+ -+ vsir_block_list_remove_index(&sorter.available_blocks, i); -+ block->order_pos = cfg->order.count; -+ if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) -+ goto fail; -+ -+ /* Close loops: since each loop is a strict subset of any -+ * outer loop, we just need to track how many blocks we've -+ * seen; when I close a loop I mark the same number of seen -+ * blocks for the next outer loop. */ -+ new_seen_count = 1; -+ while (sorter.stack_count != 0) -+ { -+ inner_stack_item = &sorter.stack[sorter.stack_count - 1]; -+ -+ inner_stack_item->seen_count += new_seen_count; -+ -+ assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); -+ if (inner_stack_item->seen_count != inner_stack_item->loop->count) -+ break; -+ -+ if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->begin, -+ cfg->order.count, false)) < 0) -+ goto fail; -+ -+ new_seen_count = inner_stack_item->loop->count; -+ --sorter.stack_count; -+ } -+ -+ /* Remove (forward) edges and make new nodes available. */ -+ for (i = 0; i < block->successors.count; ++i) -+ { -+ struct vsir_block *successor = block->successors.blocks[i]; -+ -+ if (vsir_block_dominates(successor, block)) -+ continue; -+ -+ assert(in_degrees[successor->label - 1] > 0); -+ --in_degrees[successor->label - 1]; -+ -+ if (in_degrees[successor->label - 1] == 0) -+ { -+ if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, successor)) < 0) -+ goto fail; -+ } -+ } -+ } -+ -+ if (cfg->order.count != cfg->block_count) -+ { -+ /* There is a cycle of forward edges. */ -+ WARN("The control flow graph is not reducible.\n"); -+ vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "The control flow graph is not reducible."); -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ -+ assert(sorter.stack_count == 0); -+ -+ vkd3d_free(in_degrees); -+ vkd3d_free(sorter.stack); -+ vsir_block_list_cleanup(&sorter.available_blocks); -+ -+ if (TRACE_ON()) -+ { -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); -+ -+ for (i = 0; i < cfg->order.count; ++i) -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); -+ -+ TRACE("%s\n", cfg->debug_buffer.buffer); -+ vkd3d_string_buffer_clear(&cfg->debug_buffer); -+ } -+ -+ return VKD3D_OK; -+ -+fail: -+ vkd3d_free(in_degrees); -+ vkd3d_free(sorter.stack); -+ vsir_block_list_cleanup(&sorter.available_blocks); -+ -+ return ret; -+} -+ -+/* Sort loop intervals first by ascending begin time and then by -+ * descending end time, so that inner intervals appear after outer -+ * ones and disjoint intervals appear in their proper order. */ -+static int compare_loop_intervals(const void *ptr1, const void *ptr2) -+{ -+ const struct cfg_loop_interval *interval1 = ptr1; -+ const struct cfg_loop_interval *interval2 = ptr2; -+ -+ if (interval1->begin != interval2->begin) -+ return vkd3d_u32_compare(interval1->begin, interval2->begin); -+ -+ return -vkd3d_u32_compare(interval1->end, interval2->end); -+} -+ -+static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) -+{ -+ enum vkd3d_result ret; -+ size_t i, j, k; -+ -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ -+ if (block->label == 0) -+ continue; -+ -+ for (j = 0; j < block->successors.count; ++j) -+ { -+ struct vsir_block *successor = block->successors.blocks[j]; -+ struct cfg_loop_interval *extend = NULL; -+ unsigned int begin; -+ enum -+ { -+ ACTION_DO_NOTHING, -+ ACTION_CREATE_NEW, -+ ACTION_EXTEND, -+ } action = ACTION_CREATE_NEW; -+ -+ /* We've already contructed loop intervals for the back -+ * edges, there's nothing more to do. */ -+ if (vsir_block_dominates(successor, block)) -+ continue; -+ -+ assert(block->order_pos < successor->order_pos); -+ -+ /* Jumping from a block to the following one is always -+ * possible, so nothing to do. */ -+ if (block->order_pos + 1 == successor->order_pos) -+ continue; -+ -+ /* Let's look for a loop interval that already breaks at -+ * `successor' and either contains or can be extended to -+ * contain `block'. */ -+ for (k = 0; k < cfg->loop_interval_count; ++k) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; -+ -+ if (interval->end != successor->order_pos) -+ continue; -+ -+ if (interval->begin <= block->order_pos) -+ { -+ action = ACTION_DO_NOTHING; -+ break; -+ } -+ -+ if (interval->synthetic) -+ { -+ action = ACTION_EXTEND; -+ extend = interval; -+ break; -+ } -+ } -+ -+ if (action == ACTION_DO_NOTHING) -+ continue; -+ -+ /* Ok, we have to decide where the new or replacing -+ * interval has to begin. These are the rules: 1. it must -+ * begin before `block'; 2. intervals must be properly -+ * nested; 3. the new interval should begin as late as -+ * possible, to limit control flow depth and extension. */ -+ begin = block->order_pos; -+ -+ /* Our candidate interval is always [begin, -+ * successor->order_pos), and we move `begin' backward -+ * until the candidate interval contains all the intervals -+ * whose endpoint lies in the candidate interval -+ * itself. */ -+ for (k = 0; k < cfg->loop_interval_count; ++k) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; -+ -+ if (begin < interval->end && interval->end < successor->order_pos) -+ begin = min(begin, interval->begin); -+ } -+ -+ /* New we have to care about the intervals whose begin -+ * point lies in the candidate interval. We cannot move -+ * the candidate interval endpoint, because it is -+ * important that the loop break target matches -+ * `successor'. So we have to move that interval's begin -+ * point to the begin point of the candidate interval, -+ * i.e. `begin'. But what if the interval we should extend -+ * backward is not synthetic? This cannot happen, -+ * fortunately, because it would mean that there is a jump -+ * entering a loop via a block which is not the loop -+ * header, so the CFG would not be reducible. */ -+ for (k = 0; k < cfg->loop_interval_count; ++k) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; -+ -+ if (interval->begin < successor->order_pos && successor->order_pos < interval->end) -+ { -+ if (interval->synthetic) -+ interval->begin = min(begin, interval->begin); -+ assert(begin >= interval->begin); -+ } -+ } -+ -+ if (action == ACTION_EXTEND) -+ extend->begin = begin; -+ else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0) -+ return ret; -+ } -+ } -+ -+ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); -+ -+ if (TRACE_ON()) -+ for (i = 0; i < cfg->loop_interval_count; ++i) -+ TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural", -+ cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); -+ -+ return VKD3D_OK; -+} -+ -+struct vsir_cfg_edge_action -+{ -+ enum vsir_cfg_jump_type jump_type; -+ unsigned int target; -+ struct vsir_block *successor; -+}; -+ -+static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block *block, -+ struct vsir_block *successor, struct vsir_cfg_edge_action *action) -+{ -+ unsigned int i; -+ -+ action->target = UINT_MAX; -+ action->successor = successor; -+ -+ if (successor->order_pos <= block->order_pos) -+ { -+ /* The successor is before the current block, so we have to -+ * use `continue'. The target loop is the innermost that -+ * contains the current block and has the successor as -+ * `continue' target. */ -+ for (i = 0; i < cfg->loop_interval_count; ++i) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; -+ -+ if (interval->begin == successor->order_pos && block->order_pos < interval->end) -+ action->target = i; -+ -+ if (interval->begin > successor->order_pos) -+ break; -+ } -+ -+ assert(action->target != UINT_MAX); -+ action->jump_type = JUMP_CONTINUE; -+ } -+ else -+ { -+ /* The successor is after the current block, so we have to use -+ * `break', or possibly just jump to the following block. The -+ * target loop is the outermost that contains the current -+ * block and has the successor as `break' target. */ -+ for (i = 0; i < cfg->loop_interval_count; ++i) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; -+ -+ if (interval->begin <= block->order_pos && interval->end == successor->order_pos) -+ { -+ action->target = i; -+ break; -+ } -+ } -+ -+ if (action->target == UINT_MAX) -+ { -+ assert(successor->order_pos == block->order_pos + 1); -+ action->jump_type = JUMP_NONE; -+ } -+ else -+ { -+ action->jump_type = JUMP_BREAK; -+ } -+ } -+} -+ -+static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) -+{ -+ unsigned int i, stack_depth = 1, open_interval_idx = 0; -+ struct vsir_cfg_structure_list **stack = NULL; -+ -+ /* It's enough to allocate up to the maximum interval stacking -+ * depth (plus one for the full program), but this is simpler. */ -+ if (!(stack = vkd3d_calloc(cfg->loop_interval_count + 1, sizeof(*stack)))) -+ goto fail; -+ cfg->structured_program.end = cfg->order.count; -+ stack[0] = &cfg->structured_program; -+ -+ for (i = 0; i < cfg->order.count; ++i) -+ { -+ struct vsir_block *block = cfg->order.blocks[i]; -+ struct vsir_cfg_structure *structure; -+ -+ assert(stack_depth > 0); -+ -+ /* Open loop intervals. */ -+ while (open_interval_idx < cfg->loop_interval_count) -+ { -+ struct cfg_loop_interval *interval = &cfg->loop_intervals[open_interval_idx]; -+ -+ if (interval->begin != i) -+ break; -+ -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_LOOP))) -+ goto fail; -+ structure->u.loop.idx = open_interval_idx++; -+ -+ structure->u.loop.body.end = interval->end; -+ stack[stack_depth++] = &structure->u.loop.body; -+ } -+ -+ /* Execute the block. */ -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_BLOCK))) -+ goto fail; -+ structure->u.block = block; -+ -+ /* Generate between zero and two jump instructions. */ -+ switch (block->end->handler_idx) -+ { -+ case VKD3DSIH_BRANCH: -+ { -+ struct vsir_cfg_edge_action action_true, action_false; -+ bool invert_condition = false; -+ -+ if (vsir_register_is_label(&block->end->src[0].reg)) -+ { -+ unsigned int target = label_from_src_param(&block->end->src[0]); -+ struct vsir_block *successor = &cfg->blocks[target - 1]; -+ -+ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); -+ action_false = action_true; -+ } -+ else -+ { -+ unsigned int target = label_from_src_param(&block->end->src[1]); -+ struct vsir_block *successor = &cfg->blocks[target - 1]; -+ -+ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); -+ -+ target = label_from_src_param(&block->end->src[2]); -+ successor = &cfg->blocks[target - 1]; -+ -+ vsir_cfg_compute_edge_action(cfg, block, successor, &action_false); -+ } -+ -+ /* This will happen if the branch is unconditional, -+ * but also if it's conditional with the same target -+ * in both branches, which can happen in some corner -+ * cases, e.g. when converting switch instructions to -+ * selection ladders. */ -+ if (action_true.successor == action_false.successor) -+ { -+ assert(action_true.jump_type == action_false.jump_type); -+ } -+ else -+ { -+ /* At most one branch can just fall through to the -+ * next block, in which case we make sure it's the -+ * false branch. */ -+ if (action_true.jump_type == JUMP_NONE) -+ { -+ invert_condition = true; -+ } -+ else if (stack_depth >= 2) -+ { -+ struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; -+ struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; -+ -+ assert(inner_loop->type == STRUCTURE_TYPE_LOOP); -+ -+ /* Otherwise, if one of the branches is -+ * continueing the inner loop we're inside, -+ * make sure it's the false branch (because it -+ * will be optimized out later). */ -+ if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx) -+ invert_condition = true; -+ } -+ -+ if (invert_condition) -+ { -+ struct vsir_cfg_edge_action tmp = action_true; -+ action_true = action_false; -+ action_false = tmp; -+ } -+ -+ assert(action_true.jump_type != JUMP_NONE); -+ -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) -+ goto fail; -+ structure->u.jump.type = action_true.jump_type; -+ structure->u.jump.target = action_true.target; -+ structure->u.jump.condition = &block->end->src[0]; -+ structure->u.jump.invert_condition = invert_condition; -+ } -+ -+ if (action_false.jump_type != JUMP_NONE) -+ { -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) -+ goto fail; -+ structure->u.jump.type = action_false.jump_type; -+ structure->u.jump.target = action_false.target; -+ } -+ break; -+ } -+ -+ case VKD3DSIH_RET: -+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) -+ goto fail; -+ structure->u.jump.type = JUMP_RET; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ /* Close loop intervals. */ -+ while (stack_depth > 0) -+ { -+ if (stack[stack_depth - 1]->end != i + 1) -+ break; -+ -+ --stack_depth; -+ } -+ } -+ -+ assert(stack_depth == 0); -+ assert(open_interval_idx == cfg->loop_interval_count); -+ -+ if (TRACE_ON()) -+ vsir_cfg_dump_structured_program(cfg); -+ -+ vkd3d_free(stack); -+ -+ return VKD3D_OK; -+ -+fail: -+ vkd3d_free(stack); -+ -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+} -+ -+static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list, unsigned int target) -+{ -+ struct vsir_cfg_structure *last = &list->structures[list->count - 1]; -+ -+ if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE -+ && !last->u.jump.condition && last->u.jump.target == target) -+ { -+ --list->count; -+ assert(cfg->loop_intervals[target].target_count > 0); -+ --cfg->loop_intervals[target].target_count; -+ } -+} -+ -+static struct vsir_cfg_structure *vsir_cfg_get_trailing_break(struct vsir_cfg_structure_list *list) -+{ -+ struct vsir_cfg_structure *structure; -+ size_t count = list->count; -+ -+ if (count == 0) -+ return NULL; -+ -+ structure = &list->structures[count - 1]; -+ -+ if (structure->type != STRUCTURE_TYPE_JUMP || structure->u.jump.type != JUMP_BREAK -+ || structure->u.jump.condition) -+ return NULL; -+ -+ return structure; -+} -+ -+/* When the last instruction in both branches of a selection construct -+ * is an unconditional break, any of them can be moved after the -+ * selection construct. If they break the same loop both of them can -+ * be moved out, otherwise we can choose which one: we choose the one -+ * that breaks the innermost loop, because we hope to eventually -+ * remove the loop itself. -+ * -+ * In principle a similar movement could be done when the last -+ * instructions are continue and continue, or continue and break. But -+ * in practice I don't think those situations can happen given the -+ * previous passes we do on the program, so we don't care. */ -+static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list) -+{ -+ struct vsir_cfg_structure *selection, *if_break, *else_break, *new_break; -+ unsigned int if_target, else_target, max_target; -+ size_t pos = list->count - 1; -+ -+ selection = &list->structures[pos]; -+ assert(selection->type == STRUCTURE_TYPE_SELECTION); -+ -+ if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); -+ else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); -+ -+ if (!if_break || !else_break) -+ return VKD3D_OK; -+ -+ if_target = if_break->u.jump.target; -+ else_target = else_break->u.jump.target; -+ max_target = max(if_target, else_target); -+ -+ if (!(new_break = vsir_cfg_structure_list_append(list, STRUCTURE_TYPE_JUMP))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ new_break->u.jump.type = JUMP_BREAK; -+ new_break->u.jump.target = max_target; -+ ++cfg->loop_intervals[max_target].target_count; -+ -+ /* Pointer `selection' could have been invalidated by the append -+ * operation. */ -+ selection = &list->structures[pos]; -+ assert(selection->type == STRUCTURE_TYPE_SELECTION); -+ -+ if (if_target == max_target) -+ { -+ --selection->u.selection.if_body.count; -+ assert(cfg->loop_intervals[if_target].target_count > 0); -+ --cfg->loop_intervals[if_target].target_count; -+ } -+ -+ if (else_target == max_target) -+ { -+ --selection->u.selection.else_body.count; -+ assert(cfg->loop_intervals[else_target].target_count > 0); -+ --cfg->loop_intervals[else_target].target_count; -+ } -+ -+ /* If a branch becomes empty, make it the else branch, so we save a block. */ -+ if (selection->u.selection.if_body.count == 0) -+ { -+ struct vsir_cfg_structure_list tmp; -+ -+ selection->u.selection.invert_condition = !selection->u.selection.invert_condition; -+ tmp = selection->u.selection.if_body; -+ selection->u.selection.if_body = selection->u.selection.else_body; -+ selection->u.selection.else_body = tmp; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections_recursively(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list) -+{ -+ struct vsir_cfg_structure *trailing; -+ -+ if (list->count == 0) -+ return VKD3D_OK; -+ -+ trailing = &list->structures[list->count - 1]; -+ -+ if (trailing->type != STRUCTURE_TYPE_SELECTION) -+ return VKD3D_OK; -+ -+ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.if_body); -+ vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.else_body); -+ -+ return vsir_cfg_move_breaks_out_of_selections(cfg, list); -+} -+ -+static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list) -+{ -+ enum vkd3d_result ret; -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump; -+ -+ if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition) -+ continue; -+ -+ vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION); -+ new_selection.u.selection.condition = structure->u.jump.condition; -+ new_selection.u.selection.invert_condition = structure->u.jump.invert_condition; -+ -+ if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body, -+ STRUCTURE_TYPE_JUMP))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ new_jump->u.jump.type = structure->u.jump.type; -+ new_jump->u.jump.target = structure->u.jump.target; -+ -+ /* Move the rest of the structure list in the else branch -+ * rather than leaving it after the selection construct. The -+ * reason is that this is more conducive to further -+ * optimization, because all the conditional `break's appear -+ * as the last instruction of a branch of a cascade of -+ * selection constructs at the end of the structure list we're -+ * processing, instead of being buried in the middle of the -+ * structure list itself. */ -+ if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body, -+ &list->structures[i + 1], list->count - i - 1)) < 0) -+ return ret; -+ -+ *structure = new_selection; -+ list->count = i + 1; -+ -+ if ((ret = vsir_cfg_synthesize_selections(cfg, &structure->u.selection.else_body)) < 0) -+ return ret; -+ -+ if ((ret = vsir_cfg_move_breaks_out_of_selections(cfg, list)) < 0) -+ return ret; -+ -+ break; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *new_list, struct vsir_cfg_structure *loop) -+{ -+ struct vsir_cfg_structure_list *loop_body = &loop->u.loop.body; -+ unsigned int target, loop_idx = loop->u.loop.idx; -+ struct vsir_cfg_structure *trailing_break; -+ enum vkd3d_result ret; -+ -+ trailing_break = vsir_cfg_get_trailing_break(loop_body); -+ -+ /* If the loop's last instruction is not a break, we cannot remove -+ * the loop itself. */ -+ if (!trailing_break) -+ { -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) -+ return ret; -+ memset(loop, 0, sizeof(*loop)); -+ return VKD3D_OK; -+ } -+ -+ target = trailing_break->u.jump.target; -+ assert(cfg->loop_intervals[target].target_count > 0); -+ -+ /* If the loop is not targeted by any jump, we can remove it. The -+ * trailing `break' then targets another loop, so we have to keep -+ * it. */ -+ if (cfg->loop_intervals[loop_idx].target_count == 0) -+ { -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, -+ &loop_body->structures[0], loop_body->count)) < 0) -+ return ret; -+ loop_body->count = 0; -+ return VKD3D_OK; -+ } -+ -+ /* If the loop is targeted only by its own trailing `break' -+ * instruction, then we can remove it together with the `break' -+ * itself. */ -+ if (target == loop_idx && cfg->loop_intervals[loop_idx].target_count == 1) -+ { -+ --cfg->loop_intervals[loop_idx].target_count; -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, -+ &loop_body->structures[0], loop_body->count - 1)) < 0) -+ return ret; -+ loop_body->count = 0; -+ return VKD3D_OK; -+ } -+ -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) -+ return ret; -+ memset(loop, 0, sizeof(*loop)); -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) -+{ -+ struct vsir_cfg_structure_list old_list = *list, *new_list = list; -+ enum vkd3d_result ret; -+ size_t i; -+ -+ memset(new_list, 0, sizeof(*new_list)); -+ -+ for (i = 0; i < old_list.count; ++i) -+ { -+ struct vsir_cfg_structure *loop = &old_list.structures[i], *selection; -+ struct vsir_cfg_structure_list *loop_body; -+ -+ if (loop->type != STRUCTURE_TYPE_LOOP) -+ { -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) -+ goto out; -+ memset(loop, 0, sizeof(*loop)); -+ continue; -+ } -+ -+ loop_body = &loop->u.loop.body; -+ -+ if (loop_body->count == 0) -+ { -+ if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0) -+ goto out; -+ memset(loop, 0, sizeof(*loop)); -+ continue; -+ } -+ -+ vsir_cfg_remove_trailing_continue(cfg, loop_body, loop->u.loop.idx); -+ -+ if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_synthesize_selections(cfg, loop_body)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_append_loop(cfg, new_list, loop)) < 0) -+ goto out; -+ -+ /* If the last pushed instruction is a selection and one of the branches terminates with a -+ * `break', start pushing to the other branch, in the hope of eventually push a `break' -+ * there too and be able to remove a loop. */ -+ if (new_list->count == 0) -+ continue; -+ -+ selection = &new_list->structures[new_list->count - 1]; -+ -+ if (selection->type == STRUCTURE_TYPE_SELECTION) -+ { -+ if (vsir_cfg_get_trailing_break(&selection->u.selection.if_body)) -+ new_list = &selection->u.selection.else_body; -+ else if (vsir_cfg_get_trailing_break(&selection->u.selection.else_body)) -+ new_list = &selection->u.selection.if_body; -+ } -+ } -+ -+ ret = vsir_cfg_move_breaks_out_of_selections_recursively(cfg, list); -+ -+out: -+ vsir_cfg_structure_list_cleanup(&old_list); -+ -+ return ret; -+} -+ -+static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) -+{ -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i]; -+ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ break; -+ -+ case STRUCTURE_TYPE_LOOP: -+ vsir_cfg_count_targets(cfg, &structure->u.loop.body); -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ vsir_cfg_count_targets(cfg, &structure->u.selection.if_body); -+ vsir_cfg_count_targets(cfg, &structure->u.selection.else_body); -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ if (structure->u.jump.type == JUMP_BREAK || structure->u.jump.type == JUMP_CONTINUE) -+ ++cfg->loop_intervals[structure->u.jump.target].target_count; -+ break; -+ } -+ } -+} -+ -+/* Trampolines are code gadgets used to emulate multilevel jumps (which are not natively supported -+ * by SPIR-V). A trampoline is inserted just after a loop and checks whether control has reached the -+ * intended site (i.e., we just jumped out of the target block) or if other levels of jumping are -+ * needed. For each jump a trampoline is required for all the loops between the jump itself and the -+ * target loop, excluding the target loop itself. */ -+static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, -+ struct vsir_cfg_structure *loop) -+{ -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i]; -+ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ break; -+ -+ case STRUCTURE_TYPE_LOOP: -+ structure->u.loop.outer_loop = loop; -+ vsir_cfg_mark_trampolines(cfg, &structure->u.loop.body, structure); -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ vsir_cfg_mark_trampolines(cfg, &structure->u.selection.if_body, loop); -+ vsir_cfg_mark_trampolines(cfg, &structure->u.selection.else_body, loop); -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ { -+ struct vsir_cfg_structure *l; -+ if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) -+ break; -+ for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) -+ { -+ assert(l->type == STRUCTURE_TYPE_LOOP); -+ l->u.loop.needs_trampoline = true; -+ } -+ break; -+ } -+ } -+ } -+} -+ -+/* Launchers are the counterpart of trampolines. A launcher is inserted just before a jump, and -+ * writes in a well-known variable what is the target of the jump. Trampolines will then read that -+ * variable to decide how to redirect the jump to its intended target. A launcher is needed each -+ * time the innermost loop containing the jump itself has a trampoline (independently of whether the -+ * jump is targeting that loop or not). */ -+static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list, -+ struct vsir_cfg_structure *loop) -+{ -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i]; -+ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ break; -+ -+ case STRUCTURE_TYPE_LOOP: -+ vsir_cfg_mark_launchers(cfg, &structure->u.loop.body, structure); -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ vsir_cfg_mark_launchers(cfg, &structure->u.selection.if_body, loop); -+ vsir_cfg_mark_launchers(cfg, &structure->u.selection.else_body, loop); -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) -+ break; -+ assert(loop && loop->type == STRUCTURE_TYPE_LOOP); -+ if (loop->u.loop.needs_trampoline) -+ structure->u.jump.needs_launcher = true; -+ break; -+ } -+ } -+} -+ -+static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg) -+{ -+ enum vkd3d_result ret; -+ -+ vsir_cfg_count_targets(cfg, &cfg->structured_program); -+ -+ ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program); -+ -+ /* Trampolines and launchers cannot be marked with the same pass, -+ * because a jump might have to be marked as launcher even when it -+ * targets its innermost loop, if other jumps in the same loop -+ * need a trampoline anyway. So launchers can be discovered only -+ * once all the trampolines are known. */ -+ vsir_cfg_mark_trampolines(cfg, &cfg->structured_program, NULL); -+ vsir_cfg_mark_launchers(cfg, &cfg->structured_program, NULL); -+ -+ if (TRACE_ON()) -+ vsir_cfg_dump_structured_program(cfg); -+ -+ return ret; -+} -+ -+static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list, unsigned int loop_idx); -+ -+static enum vkd3d_result vsir_cfg_structure_list_emit_block(struct vsir_cfg *cfg, -+ struct vsir_block *block) -+{ -+ struct vsir_cfg_emit_target *target = cfg->target; -+ -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, -+ target->ins_count + (block->end - block->begin))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ memcpy(&target->instructions[target->ins_count], block->begin, -+ (char *)block->end - (char *)block->begin); -+ -+ target->ins_count += block->end - block->begin; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_loop *loop, unsigned int loop_idx) -+{ -+ struct vsir_cfg_emit_target *target = cfg->target; -+ const struct vkd3d_shader_location no_loc = {0}; -+ enum vkd3d_result ret; -+ -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_LOOP); -+ -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &loop->body, loop->idx)) < 0) -+ return ret; -+ -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 5)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); -+ -+ /* Add a trampoline to implement multilevel jumping depending on the stored -+ * jump_target value. */ -+ if (loop->needs_trampoline) -+ { -+ /* If the multilevel jump is a `continue' and the target is the loop we're inside -+ * right now, then we can finally do the `continue'. */ -+ const unsigned int outer_continue_target = loop_idx << 1 | 1; -+ /* If the multilevel jump is a `continue' to any other target, or if it is a `break' -+ * and the target is not the loop we just finished emitting, then it means that -+ * we have to reach an outer loop, so we keep breaking. */ -+ const unsigned int inner_break_target = loop->idx << 1; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_IEQ, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); -+ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); -+ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], outer_continue_target); -+ -+ ++target->ins_count; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); -+ -+ ++target->ins_count; -+ ++target->temp_count; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_IEQ, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count); -+ src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx); -+ src_param_init_const_uint(&target->instructions[target->ins_count].src[1], inner_break_target); -+ -+ ++target->ins_count; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_BREAKP, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; -+ -+ src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count); -+ -+ ++target->ins_count; -+ ++target->temp_count; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_structure_list_emit_selection(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_selection *selection, unsigned int loop_idx) -+{ -+ struct vsir_cfg_emit_target *target = cfg->target; -+ const struct vkd3d_shader_location no_loc = {0}; -+ enum vkd3d_result ret; -+ -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_IF, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ target->instructions[target->ins_count].src[0] = *selection->condition; -+ -+ if (selection->invert_condition) -+ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; -+ -+ ++target->ins_count; -+ -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->if_body, loop_idx)) < 0) -+ return ret; -+ -+ if (selection->else_body.count != 0) -+ { -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ELSE); -+ -+ if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->else_body, loop_idx)) < 0) -+ return ret; -+ } -+ -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDIF); -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_jump *jump, unsigned int loop_idx) -+{ -+ struct vsir_cfg_emit_target *target = cfg->target; -+ const struct vkd3d_shader_location no_loc = {0}; -+ /* Encode the jump target as the loop index plus a bit to remember whether -+ * we're breaking or continueing. */ -+ unsigned int jump_target = jump->target << 1; -+ enum vkd3d_shader_opcode opcode; -+ -+ switch (jump->type) -+ { -+ case JUMP_CONTINUE: -+ /* If we're continueing the loop we're directly inside, then we can emit a -+ * `continue'. Otherwise we first have to break all the loops between here -+ * and the loop to continue, recording our intention to continue -+ * in the lowest bit of jump_target. */ -+ if (jump->target == loop_idx) -+ { -+ opcode = jump->condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; -+ break; -+ } -+ jump_target |= 1; -+ /* fall through */ -+ -+ case JUMP_BREAK: -+ opcode = jump->condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; -+ break; -+ -+ case JUMP_RET: -+ assert(!jump->condition); -+ opcode = VKD3DSIH_RET; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (jump->needs_launcher) -+ { -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, VKD3DSIH_MOV, 1, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ dst_param_init_temp_uint(&target->instructions[target->ins_count].dst[0], target->jump_target_temp_idx); -+ src_param_init_const_uint(&target->instructions[target->ins_count].src[0], jump_target); -+ -+ ++target->ins_count; -+ } -+ -+ if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count], -+ &no_loc, opcode, 0, !!jump->condition)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (jump->invert_condition) -+ target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; -+ -+ if (jump->condition) -+ target->instructions[target->ins_count].src[0] = *jump->condition; -+ -+ ++target->ins_count; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, -+ struct vsir_cfg_structure_list *list, unsigned int loop_idx) -+{ -+ enum vkd3d_result ret; -+ size_t i; -+ -+ for (i = 0; i < list->count; ++i) -+ { -+ struct vsir_cfg_structure *structure = &list->structures[i]; -+ -+ switch (structure->type) -+ { -+ case STRUCTURE_TYPE_BLOCK: -+ if ((ret = vsir_cfg_structure_list_emit_block(cfg, structure->u.block)) < 0) -+ return ret; -+ break; -+ -+ case STRUCTURE_TYPE_LOOP: -+ if ((ret = vsir_cfg_structure_list_emit_loop(cfg, &structure->u.loop, loop_idx)) < 0) -+ return ret; -+ break; -+ -+ case STRUCTURE_TYPE_SELECTION: -+ if ((ret = vsir_cfg_structure_list_emit_selection(cfg, &structure->u.selection, -+ loop_idx)) < 0) -+ return ret; -+ break; -+ -+ case STRUCTURE_TYPE_JUMP: -+ if ((ret = vsir_cfg_structure_list_emit_jump(cfg, &structure->u.jump, -+ loop_idx)) < 0) -+ return ret; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) -+{ -+ return vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX); -+} -+ -+static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, -+ size_t *pos) -+{ -+ enum vkd3d_result ret; -+ struct vsir_cfg cfg; -+ -+ if ((ret = vsir_cfg_init(&cfg, program, message_context, target, pos)) < 0) -+ return ret; -+ -+ vsir_cfg_compute_dominators(&cfg); -+ -+ if ((ret = vsir_cfg_compute_loops(&cfg)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0) -+ goto out; -+ -+ if ((ret = vsir_cfg_optimize(&cfg)) < 0) -+ goto out; - -- for (j = 0; j < block->successors.count; ++j) -- TRACE(" n%u -> n%u;\n", block->label, block->successors.blocks[j]->label); -- } -+ ret = vsir_cfg_emit_structured_program(&cfg); - -- TRACE("}\n"); -+out: -+ vsir_cfg_cleanup(&cfg); -+ -+ return ret; - } - --static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program) -+static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { -- struct vsir_block *current_block = NULL; -+ struct vsir_cfg_emit_target target = {0}; - enum vkd3d_result ret; - size_t i; - -- memset(cfg, 0, sizeof(*cfg)); -- cfg->program = program; -- cfg->block_count = program->block_count; -+ target.jump_target_temp_idx = program->temp_count; -+ target.temp_count = program->temp_count + 1; - -- if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) -+ if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -- for (i = 0; i < program->instructions.count; ++i) -+ for (i = 0; i < program->instructions.count;) - { -- struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (instruction->handler_idx) -+ switch (ins->handler_idx) - { -- case VKD3DSIH_PHI: -- case VKD3DSIH_SWITCH_MONOLITHIC: -- vkd3d_unreachable(); -- - case VKD3DSIH_LABEL: -- { -- unsigned int label = label_from_src_param(&instruction->src[0]); -- -- assert(!current_block); -- assert(label > 0); -- assert(label <= cfg->block_count); -- current_block = &cfg->blocks[label - 1]; -- assert(current_block->label == 0); -- if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) -+ assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -+ TRACE("Structurizing a non-hull shader.\n"); -+ if ((ret = vsir_program_structurize_function(program, message_context, -+ &target, &i)) < 0) - goto fail; -- current_block->begin = &program->instructions.elements[i + 1]; -- if (!cfg->entry) -- cfg->entry = current_block; -+ assert(i == program->instructions.count); - break; -- } - -- case VKD3DSIH_BRANCH: -- case VKD3DSIH_RET: -- assert(current_block); -- current_block->end = instruction; -- current_block = NULL; -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); -+ target.instructions[target.ins_count++] = *ins; -+ ++i; -+ if ((ret = vsir_program_structurize_function(program, message_context, -+ &target, &i)) < 0) -+ goto fail; - break; - - default: -+ if (!reserve_instructions(&target.instructions, &target.ins_capacity, target.ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ target.instructions[target.ins_count++] = *ins; -+ ++i; - break; - } - } - -- for (i = 0; i < cfg->block_count; ++i) -- { -- struct vsir_block *block = &cfg->blocks[i]; -- -- if (block->label == 0) -- continue; -- -- switch (block->end->handler_idx) -- { -- case VKD3DSIH_RET: -- break; -- -- case VKD3DSIH_BRANCH: -- if (vsir_register_is_label(&block->end->src[0].reg)) -- { -- if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[0])) < 0) -- goto fail; -- } -- else -- { -- if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[1])) < 0) -- goto fail; -- -- if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[2])) < 0) -- goto fail; -- } -- break; -- -- default: -- vkd3d_unreachable(); -- } -- } -- -- if (TRACE_ON()) -- vsir_cfg_dump_dot(cfg); -+ vkd3d_free(program->instructions.elements); -+ program->instructions.elements = target.instructions; -+ program->instructions.capacity = target.ins_capacity; -+ program->instructions.count = target.ins_count; -+ program->temp_count = target.temp_count; - - return VKD3D_OK; - - fail: -- vsir_cfg_cleanup(cfg); -+ vkd3d_free(target.instructions); - - return ret; - } - --/* Block A dominates block B if every path from the entry point to B -- * must pass through A. Naively compute the set of blocks that are -- * dominated by `reference' by running a graph visit starting from the -- * entry point (which must be the initial value of `current') and -- * avoiding `reference'. Running this for all the blocks takes -- * quadratic time: if in the future something better is sought after, -- * the standard tool seems to be the Lengauer-Tarjan algorithm. */ --static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, struct vsir_block *reference) -+static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc, -+ struct vsir_block *block, struct vsir_block **origin_blocks) - { -- size_t i; -- -- assert(current->label != 0); -- -- if (current == reference) -- return; -+ unsigned int i; - -- if (!bitmap_is_set(reference->dominates, current->label - 1)) -+ if (!register_is_ssa(reg)) - return; - -- bitmap_clear(reference->dominates, current->label - 1); -+ i = reg->idx[0].offset; -+ if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) -+ alloc->table[i] = alloc->next_temp_idx++; - -- for (i = 0; i < current->successors.count; ++i) -- vsir_cfg_compute_dominators_recurse(current->successors.blocks[i], reference); -+ for (i = 0; i < reg->idx_count; ++i) -+ if (reg->idx[i].rel_addr) -+ register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks); - } - --static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) -+/* Drivers are not necessarily optimised to handle very large numbers of temps. For example, -+ * using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV. -+ * This can also result in the backend emitting less code because temps typically need an -+ * access chain and a load/store. Conversion of phi SSA values to temps should eliminate all -+ * undominated SSA use, but structurisation may create new occurrences. */ -+static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg) - { -- struct vkd3d_string_buffer buf; -- size_t i, j; -+ struct vsir_program *program = cfg->program; -+ struct ssas_to_temps_alloc alloc = {0}; -+ struct vsir_block **origin_blocks; -+ unsigned int j; -+ size_t i; - -- if (TRACE_ON()) -- vkd3d_string_buffer_init(&buf); -+ if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks)))) -+ { -+ ERR("Failed to allocate origin block array.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count)) -+ { -+ vkd3d_free(origin_blocks); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } - - for (i = 0; i < cfg->block_count; ++i) - { - struct vsir_block *block = &cfg->blocks[i]; -+ struct vkd3d_shader_instruction *ins; - - if (block->label == 0) - continue; - -- vsir_cfg_compute_dominators_recurse(cfg->entry, block); -- -- if (TRACE_ON()) -+ for (ins = block->begin; ins <= block->end; ++ins) - { -- vkd3d_string_buffer_printf(&buf, "Block %u dominates:", block->label); -- for (j = 0; j < cfg->block_count; j++) -+ for (j = 0; j < ins->dst_count; ++j) - { -- struct vsir_block *block2 = &cfg->blocks[j]; -- -- if (block2->label == 0) -- continue; -- -- if (bitmap_is_set(block->dominates, j)) -- vkd3d_string_buffer_printf(&buf, " %u", block2->label); -+ if (register_is_ssa(&ins->dst[j].reg)) -+ origin_blocks[ins->dst[j].reg.idx[0].offset] = block; - } -- TRACE("%s\n", buf.buffer); -- vkd3d_string_buffer_clear(&buf); - } - } - -- if (TRACE_ON()) -- vkd3d_string_buffer_cleanup(&buf); --} -+ for (i = 0; i < cfg->block_count; ++i) -+ { -+ struct vsir_block *block = &cfg->blocks[i]; -+ struct vkd3d_shader_instruction *ins; - --enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info) --{ -- struct vkd3d_shader_instruction_array *instructions = &parser->program.instructions; -- enum vkd3d_result result = VKD3D_OK; -+ if (block->label == 0) -+ continue; - -- remove_dcl_temps(&parser->program); -+ for (ins = block->begin; ins <= block->end; ++ins) -+ { -+ for (j = 0; j < ins->src_count; ++j) -+ register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks); -+ } -+ } - -- if ((result = instruction_array_lower_texkills(parser)) < 0) -- return result; -+ if (alloc.next_temp_idx == program->temp_count) -+ goto done; - -- if (parser->shader_desc.is_dxil) -- { -- struct vsir_cfg cfg; -+ TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); - -- if ((result = lower_switch_to_if_ladder(&parser->program)) < 0) -- return result; -+ for (i = cfg->function_begin; i < cfg->function_end; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- if ((result = materialize_ssas_to_temps(parser)) < 0) -- return result; -+ for (j = 0; j < ins->dst_count; ++j) -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); - -- if ((result = vsir_cfg_init(&cfg, &parser->program)) < 0) -- return result; -+ for (j = 0; j < ins->src_count; ++j) -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); -+ } - -- vsir_cfg_compute_dominators(&cfg); -+ program->temp_count = alloc.next_temp_idx; -+done: -+ vkd3d_free(origin_blocks); -+ vkd3d_free(alloc.table); - -- if ((result = simple_structurizer_run(parser)) < 0) -- { -- vsir_cfg_cleanup(&cfg); -- return result; -- } -+ return VKD3D_OK; -+} - -- vsir_cfg_cleanup(&cfg); -- } -- else -- { -- if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -- { -- if ((result = remap_output_signature(parser, compile_info)) < 0) -- return result; -- } -+static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_function( -+ struct vsir_program *program, struct vkd3d_shader_message_context *message_context, -+ size_t *pos) -+{ -+ enum vkd3d_result ret; -+ struct vsir_cfg cfg; - -- if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL) -- { -- if ((result = instruction_array_flatten_hull_shader_phases(instructions)) < 0) -- return result; -+ if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL, pos)) < 0) -+ return ret; - -- if ((result = instruction_array_normalise_hull_shader_control_point_io(instructions, -- &parser->shader_desc.input_signature)) < 0) -- return result; -- } -+ vsir_cfg_compute_dominators(&cfg); - -- if ((result = shader_normalise_io_registers(parser)) < 0) -- return result; -+ ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg); - -- if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0) -- return result; -+ vsir_cfg_cleanup(&cfg); - -- remove_dead_code(&parser->program); -+ return ret; -+} - -- if ((result = normalise_combined_samplers(parser)) < 0) -- return result; -- } -+static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ enum vkd3d_result ret; -+ size_t i; - -- if ((result = flatten_control_flow_constructs(parser)) < 0) -- return result; -+ for (i = 0; i < program->instructions.count;) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- if (TRACE_ON()) -- vkd3d_shader_trace(&parser->program); -+ switch (ins->handler_idx) -+ { -+ case VKD3DSIH_LABEL: -+ assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -+ TRACE("Materializing undominated SSAs in a non-hull shader.\n"); -+ if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( -+ program, message_context, &i)) < 0) -+ return ret; -+ assert(i == program->instructions.count); -+ break; - -- if (!parser->failed && (result = vsir_validate(parser)) < 0) -- return result; -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); -+ ++i; -+ if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( -+ program, message_context, &i)) < 0) -+ return ret; -+ break; - -- if (parser->failed) -- result = VKD3D_ERROR_INVALID_SHADER; -+ default: -+ ++i; -+ break; -+ } -+ } - -- return result; -+ return VKD3D_OK; - } - - struct validation_context - { -- struct vkd3d_shader_parser *parser; -+ struct vkd3d_shader_message_context *message_context; - const struct vsir_program *program; - size_t instruction_idx; -+ struct vkd3d_shader_location null_location; - bool invalid_instruction_idx; -+ enum vkd3d_result status; - bool dcl_temps_found; - enum vkd3d_shader_opcode phase; - enum cf_type -@@ -3452,16 +5199,21 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c - - if (ctx->invalid_instruction_idx) - { -- vkd3d_shader_parser_error(ctx->parser, error, "%s", buf.buffer); -+ vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); - ERR("VSIR validation error: %s\n", buf.buffer); - } - else - { -- vkd3d_shader_parser_error(ctx->parser, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); -+ const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; -+ vkd3d_shader_error(ctx->message_context, &ins->location, error, -+ "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); - ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); - } - - vkd3d_string_buffer_cleanup(&buf); -+ -+ if (!ctx->status) -+ ctx->status = VKD3D_ERROR_INVALID_SHADER; - } - - static void vsir_validate_src_param(struct validation_context *ctx, -@@ -3515,10 +5267,10 @@ static void vsir_validate_register(struct validation_context *ctx, - if (reg->idx[0].rel_addr) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); - -- if (reg->idx[0].offset >= ctx->parser->program.temp_count) -+ if (reg->idx[0].offset >= ctx->program->temp_count) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", -- reg->idx[0].offset, ctx->parser->program.temp_count); -+ reg->idx[0].offset, ctx->program->temp_count); - break; - } - -@@ -3606,7 +5358,7 @@ static void vsir_validate_register(struct validation_context *ctx, - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", - reg->precision); - -- if (reg->data_type != VKD3D_DATA_UINT) -+ if (reg->data_type != VKD3D_DATA_UNUSED) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", - reg->data_type); - -@@ -3708,7 +5460,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx, - switch (dst->reg.type) - { - case VKD3DSPR_SSA: -- if (dst->reg.idx[0].offset < ctx->parser->program.ssa_count) -+ if (dst->reg.idx[0].offset < ctx->program->ssa_count) - { - struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; - -@@ -3761,7 +5513,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, - switch (src->reg.type) - { - case VKD3DSPR_SSA: -- if (src->reg.idx[0].offset < ctx->parser->program.ssa_count) -+ if (src->reg.idx[0].offset < ctx->program->ssa_count) - { - struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; - unsigned int i; -@@ -3852,7 +5604,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) - size_t i; - - instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; -- ctx->parser->location = instruction->location; - - for (i = 0; i < instruction->dst_count; ++i) - vsir_validate_dst_param(ctx, &instruction->dst[i]); -@@ -3884,11 +5635,74 @@ static void vsir_validate_instruction(struct validation_context *ctx) - ctx->dcl_temps_found = false; - return; - -+ case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: -+ /* Exclude non-finite values. */ -+ if (!(instruction->declaration.max_tessellation_factor >= 1.0f -+ && instruction->declaration.max_tessellation_factor <= 64.0f)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.", -+ instruction->declaration.max_tessellation_factor); -+ return; -+ -+ case VKD3DSIH_DCL_INPUT_PRIMITIVE: -+ if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -+ || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", -+ instruction->declaration.primitive_type.type); -+ return; -+ -+ case VKD3DSIH_DCL_VERTICES_OUT: -+ if (instruction->declaration.count > 1024) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", -+ instruction->declaration.count); -+ return; -+ -+ case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: -+ if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -+ || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", -+ instruction->declaration.primitive_type.type); -+ return; -+ -+ case VKD3DSIH_DCL_GS_INSTANCES: -+ if (!instruction->declaration.count || instruction->declaration.count > 32) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", -+ instruction->declaration.count); -+ return; -+ -+ case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: -+ if (!instruction->declaration.count || instruction->declaration.count > 32) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", -+ instruction->declaration.count); -+ return; -+ -+ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: -+ if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID -+ || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); -+ return; -+ -+ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: -+ if (!instruction->declaration.tessellator_output_primitive -+ || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive); -+ return; -+ -+ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: -+ if (!instruction->declaration.tessellator_partitioning -+ || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning); -+ return; -+ - default: - break; - } - -- if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID) -+ /* Only DCL instructions may occur outside hull shader phases. */ -+ if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL -+ && ctx->phase == VKD3DSIH_INVALID) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "Instruction %#x appear before any phase instruction in a hull shader.", - instruction->handler_idx); -@@ -4180,7 +5994,8 @@ static void vsir_validate_instruction(struct validation_context *ctx) - unsigned int value_idx = 2 * i; - unsigned int label_idx = 2 * i + 1; - -- if (!register_is_constant(&instruction->src[value_idx].reg) && !register_is_ssa(&instruction->src[value_idx].reg)) -+ if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) -+ && !register_is_ssa(&instruction->src[value_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid value register for incoming %zu of type %#x in PHI instruction, " - "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); -@@ -4203,17 +6018,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) - } - } - --enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) -+enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, -+ const char *source_name, struct vkd3d_shader_message_context *message_context) - { - struct validation_context ctx = - { -- .parser = parser, -- .program = &parser->program, -+ .message_context = message_context, -+ .program = program, -+ .null_location = {.source_name = source_name}, -+ .status = VKD3D_OK, - .phase = VKD3DSIH_INVALID, - }; - unsigned int i; - -- if (!(parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) -+ if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) - return VKD3D_OK; - - if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) -@@ -4222,7 +6040,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) - if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) - goto fail; - -- for (ctx.instruction_idx = 0; ctx.instruction_idx < parser->program.instructions.count; ++ctx.instruction_idx) -+ for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) - vsir_validate_instruction(&ctx); - - ctx.invalid_instruction_idx = true; -@@ -4247,7 +6065,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) - vkd3d_free(ctx.temps); - vkd3d_free(ctx.ssas); - -- return VKD3D_OK; -+ return ctx.status; - - fail: - vkd3d_free(ctx.blocks); -@@ -4256,3 +6074,72 @@ fail: - - return VKD3D_ERROR_OUT_OF_MEMORY; - } -+ -+enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -+{ -+ enum vkd3d_result result = VKD3D_OK; -+ -+ if ((result = vsir_program_lower_instructions(program)) < 0) -+ return result; -+ -+ if (program->shader_version.major >= 6) -+ { -+ if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) -+ return result; -+ -+ if ((result = lower_switch_to_if_ladder(program)) < 0) -+ return result; -+ -+ if ((result = vsir_program_structurize(program, message_context)) < 0) -+ return result; -+ -+ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) -+ return result; -+ -+ if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0) -+ return result; -+ } -+ else -+ { -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ { -+ if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) -+ return result; -+ } -+ -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) -+ { -+ if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) -+ return result; -+ -+ if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, -+ &program->input_signature)) < 0) -+ return result; -+ } -+ -+ if ((result = vsir_program_normalise_io_registers(program)) < 0) -+ return result; -+ -+ if ((result = instruction_array_normalise_flat_constants(program)) < 0) -+ return result; -+ -+ remove_dead_code(program); -+ -+ if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) -+ return result; -+ -+ if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL -+ && (result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) -+ return result; -+ } -+ -+ if (TRACE_ON()) -+ vkd3d_shader_trace(program); -+ -+ if ((result = vsir_program_validate(program, config_flags, -+ compile_info->source_name, message_context)) < 0) -+ return result; -+ -+ return result; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 5c87ff15503..4ee8e6bba4c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -45,6 +45,8 @@ static spv_target_env spv_target_env_from_vkd3d(enum vkd3d_shader_spirv_environm - return SPV_ENV_OPENGL_4_5; - case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: - return SPV_ENV_VULKAN_1_0; -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: -+ return SPV_ENV_VULKAN_1_1; - default: - ERR("Invalid environment %#x.\n", environment); - return SPV_ENV_VULKAN_1_0; -@@ -223,7 +225,8 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d - } - } - --#define VKD3D_SPIRV_VERSION 0x00010000 -+#define VKD3D_SPIRV_VERSION_1_0 0x00010000 -+#define VKD3D_SPIRV_VERSION_1_3 0x00010300 - #define VKD3D_SPIRV_GENERATOR_ID 18 - #define VKD3D_SPIRV_GENERATOR_VERSION 11 - #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) -@@ -358,6 +361,7 @@ struct vkd3d_spirv_builder - uint32_t type_sampler_id; - uint32_t type_bool_id; - uint32_t type_void_id; -+ uint32_t scope_subgroup_id; - - struct vkd3d_spirv_stream debug_stream; /* debug instructions */ - struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */ -@@ -1524,6 +1528,19 @@ static uint32_t vkd3d_spirv_build_op_logical_equal(struct vkd3d_spirv_builder *b - SpvOpLogicalEqual, result_type, operand0, operand1); - } - -+static uint32_t vkd3d_spirv_build_op_logical_or(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t operand0, uint32_t operand1) -+{ -+ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, -+ SpvOpLogicalOr, result_type, operand0, operand1); -+} -+ -+static uint32_t vkd3d_spirv_build_op_logical_not(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t operand) -+{ -+ return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpLogicalNot, result_type, operand); -+} -+ - static uint32_t vkd3d_spirv_build_op_convert_utof(struct vkd3d_spirv_builder *builder, - uint32_t result_type, uint32_t unsigned_value) - { -@@ -1725,6 +1742,63 @@ static void vkd3d_spirv_build_op_memory_barrier(struct vkd3d_spirv_builder *buil - SpvOpMemoryBarrier, memory_id, memory_semantics_id); - } - -+static uint32_t vkd3d_spirv_build_op_scope_subgroup(struct vkd3d_spirv_builder *builder) -+{ -+ return vkd3d_spirv_get_op_constant(builder, vkd3d_spirv_get_op_type_int(builder, 32, 0), SpvScopeSubgroup); -+} -+ -+static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *builder) -+{ -+ return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); -+ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpGroupNonUniformBallot, -+ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, SpvGroupOperation group_op, uint32_t val_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBallotBitCount, -+ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), group_op, val_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_elect(struct vkd3d_spirv_builder *builder) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); -+ return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpGroupNonUniformElect, -+ vkd3d_spirv_get_op_type_bool(builder), vkd3d_spirv_get_op_scope_subgroup(builder)); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t lane_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcast, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_shuffle(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t lane_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformShuffle); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformShuffle, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast_first(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); -+ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcastFirst, -+ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); -+} -+ - static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, - enum GLSLstd450 op, uint32_t result_type, uint32_t operand) - { -@@ -1825,6 +1899,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder - { - switch (data_type) - { -+ case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ - case VKD3D_DATA_FLOAT: - case VKD3D_DATA_SNORM: - case VKD3D_DATA_UNORM: -@@ -1832,6 +1907,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder - break; - case VKD3D_DATA_INT: - case VKD3D_DATA_UINT: -+ case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ - return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); - break; - case VKD3D_DATA_DOUBLE: -@@ -1900,7 +1976,7 @@ static void vkd3d_spirv_builder_free(struct vkd3d_spirv_builder *builder) - } - - static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, -- struct vkd3d_shader_code *spirv, const char *entry_point) -+ struct vkd3d_shader_code *spirv, const char *entry_point, enum vkd3d_shader_spirv_environment environment) - { - uint64_t capability_mask = builder->capability_mask; - struct vkd3d_spirv_stream stream; -@@ -1911,7 +1987,8 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, - vkd3d_spirv_stream_init(&stream); - - vkd3d_spirv_build_word(&stream, SpvMagicNumber); -- vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_VERSION); -+ vkd3d_spirv_build_word(&stream, (environment == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1) -+ ? VKD3D_SPIRV_VERSION_1_3 : VKD3D_SPIRV_VERSION_1_0); - vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_GENERATOR_MAGIC); - vkd3d_spirv_build_word(&stream, builder->current_id); /* bound */ - vkd3d_spirv_build_word(&stream, 0); /* schema, reserved */ -@@ -1940,6 +2017,9 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, - || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStorageImageArrayDynamicIndexing) - || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderNonUniformEXT)) - vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_descriptor_indexing"); -+ if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderPixelInterlockEXT) -+ || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderSampleInterlockEXT)) -+ vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_fragment_shader_interlock"); - if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStencilExportEXT)) - vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_shader_stencil_export"); - if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderViewportIndexLayerEXT)) -@@ -2346,6 +2426,7 @@ struct spirv_compiler - unsigned int output_control_point_count; - - bool use_vocp; -+ bool use_invocation_interlock; - bool emit_point_size; - - enum vkd3d_shader_opcode phase; -@@ -2427,14 +2508,13 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) - vkd3d_free(compiler); - } - --static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, -- struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, -+static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, -- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, -- uint64_t config_flags) -+ struct vkd3d_shader_message_context *message_context, uint64_t config_flags) - { -- const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; -- const struct shader_signature *output_signature = &shader_desc->output_signature; -+ const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; -+ const struct shader_signature *output_signature = &program->output_signature; - const struct vkd3d_shader_interface_info *shader_interface; - const struct vkd3d_shader_descriptor_offset_info *offset_info; - const struct vkd3d_shader_spirv_target_info *target_info; -@@ -2447,7 +2527,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve - - memset(compiler, 0, sizeof(*compiler)); - compiler->message_context = message_context; -- compiler->location = *location; -+ compiler->location.source_name = compile_info->source_name; - compiler->config_flags = config_flags; - - if ((target_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO))) -@@ -2456,6 +2536,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve - { - case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: - case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: - break; - default: - WARN("Invalid target environment %#x.\n", target_info->environment); -@@ -2545,7 +2626,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve - - rb_init(&compiler->symbol_table, vkd3d_symbol_compare); - -- compiler->shader_type = shader_version->type; -+ compiler->shader_type = program->shader_version.type; - - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) - { -@@ -2608,6 +2689,11 @@ static bool spirv_compiler_is_opengl_target(const struct spirv_compiler *compile - return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5; - } - -+static bool spirv_compiler_is_spirv_min_1_3_target(const struct spirv_compiler *compiler) -+{ -+ return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; -+} -+ - static bool spirv_compiler_is_target_extension_supported(const struct spirv_compiler *compiler, - enum vkd3d_shader_spirv_extension extension) - { -@@ -3126,6 +3212,12 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s - case VKD3DSPR_OUTSTENCILREF: - snprintf(buffer, buffer_size, "oStencilRef"); - break; -+ case VKD3DSPR_WAVELANECOUNT: -+ snprintf(buffer, buffer_size, "vWaveLaneCount"); -+ break; -+ case VKD3DSPR_WAVELANEINDEX: -+ snprintf(buffer, buffer_size, "vWaveLaneIndex"); -+ break; - default: - FIXME("Unhandled register %#x.\n", reg->type); - snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); -@@ -3372,7 +3464,7 @@ struct vkd3d_shader_register_info - bool is_aggregate; - }; - --static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, -+static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_info *register_info) - { - struct vkd3d_symbol reg_symbol, *symbol; -@@ -3398,7 +3490,8 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil - vkd3d_symbol_make_register(®_symbol, reg); - if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) - { -- FIXME("Unrecognized register (%s).\n", debug_vkd3d_symbol(®_symbol)); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE, -+ "Unrecognized register (%s).\n", debug_vkd3d_symbol(®_symbol)); - memset(register_info, 0, sizeof(*register_info)); - return false; - } -@@ -3548,8 +3641,9 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); - } - -+ /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ - if (reg->alignment) -- WARN("Ignoring alignment %u.\n", reg->alignment); -+ TRACE("Ignoring alignment %u.\n", reg->alignment); - - if (index_count) - { -@@ -3736,6 +3830,70 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil - return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); - } - -+/* Based on the implementation in the OpenGL Mathematics library. */ -+static uint32_t half_to_float(uint16_t value) -+{ -+ uint32_t s = (value & 0x8000u) << 16; -+ uint32_t e = (value >> 10) & 0x1fu; -+ uint32_t m = value & 0x3ffu; -+ -+ if (!e) -+ { -+ if (!m) -+ { -+ /* Plus or minus zero */ -+ return s; -+ } -+ else -+ { -+ /* Denormalized number -- renormalize it */ -+ -+ while (!(m & 0x400u)) -+ { -+ m <<= 1; -+ --e; -+ } -+ -+ ++e; -+ m &= ~0x400u; -+ } -+ } -+ else if (e == 31u) -+ { -+ /* Positive or negative infinity for zero 'm'. -+ * Nan for non-zero 'm' -- preserve sign and significand bits */ -+ return s | 0x7f800000u | (m << 13); -+ } -+ -+ /* Normalized number */ -+ e += 127u - 15u; -+ m <<= 13; -+ -+ /* Assemble s, e and m. */ -+ return s | (e << 23) | m; -+} -+ -+static uint32_t convert_raw_constant32(enum vkd3d_data_type data_type, unsigned int uint_value) -+{ -+ int16_t i; -+ -+ /* TODO: native 16-bit support. */ -+ if (data_type != VKD3D_DATA_UINT16 && data_type != VKD3D_DATA_HALF) -+ return uint_value; -+ -+ if (data_type == VKD3D_DATA_HALF) -+ return half_to_float(uint_value); -+ -+ /* Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or -+ * should not be sign-extended when 16-bit execution is not supported. The AMD RX 580 Windows -+ * driver has no 16-bit support, and sign-extends all 16-bit constant ints to 32 bits. These -+ * results differ from SM 5. The RX 6750 XT supports 16-bit execution, so constants are not -+ * extended, and results match SM 5. It seems best to replicate the sign-extension, and if -+ * execution is 16-bit, the values will be truncated. */ -+ i = uint_value; -+ return (int32_t)i; -+} -+ - static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) - { -@@ -3748,14 +3906,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile - if (reg->dimension == VSIR_DIMENSION_SCALAR) - { - for (i = 0; i < component_count; ++i) -- values[i] = *reg->u.immconst_u32; -+ values[i] = convert_raw_constant32(reg->data_type, reg->u.immconst_u32[0]); - } - else - { - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) -- values[j++] = reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]; -+ values[j++] = convert_raw_constant32(reg->data_type, -+ reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]); - } - } - -@@ -3899,6 +4058,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil - - switch (icb->data_type) - { -+ case VKD3D_DATA_HALF: -+ case VKD3D_DATA_UINT16: -+ /* Scalar only. */ -+ for (i = 0; i < element_count; ++i) -+ elements[i] = vkd3d_spirv_get_op_constant(builder, elem_type_id, -+ convert_raw_constant32(icb->data_type, icb->data[i])); -+ break; - case VKD3D_DATA_FLOAT: - case VKD3D_DATA_INT: - case VKD3D_DATA_UINT: -@@ -3998,7 +4164,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - struct vkd3d_shader_register_info reg_info; - unsigned int component_count; - uint32_t type_id, val_id; -- uint32_t write_mask32; -+ uint32_t val_write_mask; - - if (reg->type == VKD3DSPR_IMMCONST) - return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); -@@ -4018,17 +4184,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - return vkd3d_spirv_get_op_undef(builder, type_id); - } -- assert(reg_info.component_type != VKD3D_SHADER_COMPONENT_DOUBLE); - spirv_compiler_emit_dereference_register(compiler, reg, ®_info); - -- write_mask32 = data_type_is_64_bit(reg->data_type) ? vsir_write_mask_32_from_64(write_mask) : write_mask; -+ val_write_mask = (data_type_is_64_bit(reg->data_type) && !component_type_is_64_bit(reg_info.component_type)) -+ ? vsir_write_mask_32_from_64(write_mask) : write_mask; - - /* Intermediate value (no storage class). */ - if (reg_info.storage_class == SpvStorageClassMax) - { - val_id = reg_info.id; - } -- else if (vsir_write_mask_component_count(write_mask32) == 1) -+ else if (vsir_write_mask_component_count(val_write_mask) == 1) - { - return spirv_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); - } -@@ -4041,7 +4207,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - - swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; - val_id = spirv_compiler_emit_swizzle(compiler, -- val_id, reg_info.write_mask, reg_info.component_type, swizzle, write_mask32); -+ val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask); - - if (component_type != reg_info.component_type) - { -@@ -4087,7 +4253,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, - uint32_t type_id; - - type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); -- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) -+ if (data_type_is_floating_point(reg->data_type)) - return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id); - - FIXME("Unhandled data type %#x.\n", reg->data_type); -@@ -4101,7 +4267,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, - uint32_t type_id; - - type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); -- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) -+ if (data_type_is_floating_point(reg->data_type)) - return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); - else if (data_type_is_integer(reg->data_type)) - return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); -@@ -4285,7 +4451,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, - } - - type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); -- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) -+ if (data_type_is_floating_point(reg->data_type)) - return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id); - - FIXME("Unhandled data type %#x.\n", reg->data_type); -@@ -4322,11 +4488,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp - { - unsigned int component_count = vsir_write_mask_component_count(dst->write_mask); - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- uint32_t type_id, val_id; -+ uint32_t type_id, dst_type_id, val_id; - -+ type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - if (component_count > 1) - { -- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - val_id = vkd3d_spirv_build_op_composite_construct(builder, - type_id, component_ids, component_count); - } -@@ -4334,6 +4500,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp - { - val_id = *component_ids; - } -+ -+ dst_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); -+ if (dst_type_id != type_id) -+ val_id = vkd3d_spirv_build_op_bitcast(builder, dst_type_id, val_id); -+ - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - -@@ -4433,6 +4604,10 @@ static void spirv_compiler_decorate_builtin(struct spirv_compiler *compiler, - case SpvBuiltInCullDistance: - vkd3d_spirv_enable_capability(builder, SpvCapabilityCullDistance); - break; -+ case SpvBuiltInSubgroupSize: -+ case SpvBuiltInSubgroupLocalInvocationId: -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); -+ break; - default: - break; - } -@@ -4622,6 +4797,9 @@ vkd3d_register_builtins[] = - {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - - {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, -+ -+ {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, -+ {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, - }; - - static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, -@@ -5670,9 +5848,26 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler - flags &= ~VKD3DSGF_ENABLE_INT64; - } - -+ if (flags & VKD3DSGF_ENABLE_WAVE_INTRINSICS) -+ { -+ if (!(compiler->features & VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS)) -+ { -+ WARN("Unsupported wave ops.\n"); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -+ "The target environment does not support wave ops."); -+ } -+ else if (!spirv_compiler_is_spirv_min_1_3_target(compiler)) -+ { -+ WARN("Wave ops enabled but environment does not support SPIR-V 1.3 or greater.\n"); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -+ "The target environment uses wave ops but does not support SPIR-V 1.3 or greater."); -+ } -+ flags &= ~VKD3DSGF_ENABLE_WAVE_INTRINSICS; -+ } -+ - if (flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)) - FIXME("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); -- else -+ else if (flags) - WARN("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); - } - -@@ -5734,8 +5929,9 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil - vsir_register_init(®, VKD3DSPR_IDXTEMP, VKD3D_DATA_FLOAT, 1); - reg.idx[0].offset = temp->register_idx; - -+ /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ - if (temp->alignment) -- WARN("Ignoring alignment %u.\n", temp->alignment); -+ TRACE("Ignoring alignment %u.\n", temp->alignment); - - function_location = spirv_compiler_get_current_function_location(compiler); - vkd3d_spirv_begin_function_stream_insertion(builder, function_location); -@@ -6272,9 +6468,24 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) - vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); - -- if (d->uav_flags & VKD3DSUF_GLOBALLY_COHERENT) -+ /* ROVs are implicitly globally coherent. */ -+ if (d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW)) - vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationCoherent, NULL, 0); - -+ if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) -+ { -+ if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -+ "Rasteriser-ordered views are only supported in fragment shaders."); -+ else if (!spirv_compiler_is_target_extension_supported(compiler, -+ VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK)) -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -+ "Cannot enable fragment shader interlock. " -+ "The target environment does not support fragment shader interlock."); -+ else -+ compiler->use_invocation_interlock = true; -+ } -+ - if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) - { - assert(structure_stride); /* counters are valid only for structured buffers */ -@@ -6324,20 +6535,26 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - } - - static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, -- const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) -+ const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size, -+ unsigned int structure_stride, bool zero_init) - { -- uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id; -+ uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const SpvStorageClass storage_class = SpvStorageClassWorkgroup; - struct vkd3d_symbol reg_symbol; - -+ /* Alignment is supported only in the Kernel execution model. */ -+ if (alignment) -+ TRACE("Ignoring alignment %u.\n", alignment); -+ - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - length_id = spirv_compiler_get_constant_uint(compiler, size); - array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - - pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); -+ init_id = zero_init ? vkd3d_spirv_get_op_constant_null(builder, array_type_id) : 0; - var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, -- pointer_type_id, storage_class, 0); -+ pointer_type_id, storage_class, init_id); - - spirv_compiler_emit_register_debug_name(builder, var_id, reg); - -@@ -6352,8 +6569,8 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { - const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; -- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, -- tgsm_raw->byte_count / 4, 0); -+ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, tgsm_raw->alignment, -+ tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); - } - - static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler, -@@ -6361,8 +6578,8 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi - { - const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; - unsigned int stride = tgsm_structured->byte_stride / 4; -- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, -- tgsm_structured->structure_count * stride, stride); -+ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, tgsm_structured->alignment, -+ tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); - } - - static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, -@@ -6871,7 +7088,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, - assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); - - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -- if (dst->reg.data_type == VKD3D_DATA_FLOAT) -+ if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) - { - val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); - } -@@ -6880,7 +7097,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, - /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ - val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); - } -- else if (dst->reg.data_type == VKD3D_DATA_UINT) -+ else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) - { - val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); - } -@@ -6909,6 +7126,15 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - SpvOp op = SpvOpMax; - unsigned int i; - -+ if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) -+ { -+ /* At least some drivers support this anyway, but if validation is enabled it will fail. */ -+ FIXME("Unsupported 64-bit source for bit count.\n"); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "64-bit source for bit count is not supported."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ - if (src->reg.data_type == VKD3D_DATA_BOOL) - { - if (dst->reg.data_type == VKD3D_DATA_BOOL) -@@ -6997,6 +7223,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( - } - glsl_insts[] = - { -+ {VKD3DSIH_ABS, GLSLstd450FAbs}, - {VKD3DSIH_ACOS, GLSLstd450Acos}, - {VKD3DSIH_ASIN, GLSLstd450Asin}, - {VKD3DSIH_ATAN, GLSLstd450Atan}, -@@ -7049,6 +7276,16 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp - unsigned int i, component_count; - enum GLSLstd450 glsl_inst; - -+ if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI -+ || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) -+ { -+ /* At least some drivers support this anyway, but if validation is enabled it will fail. */ -+ FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "64-bit source for handler %#x is not supported.", instruction->handler_idx); -+ return; -+ } -+ - glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); - if (glsl_inst == GLSLstd450Bad) - { -@@ -7093,8 +7330,8 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - struct vkd3d_shader_register_info dst_reg_info, src_reg_info; - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; -+ unsigned int i, component_count, write_mask; - uint32_t components[VKD3D_VEC4_SIZE]; -- unsigned int i, component_count; - - if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA - || dst->modifiers || src->modifiers) -@@ -7145,7 +7382,13 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - } - - general_implementation: -- val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -+ write_mask = dst->write_mask; -+ if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) -+ write_mask = vsir_write_mask_64_from_32(write_mask); -+ else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) -+ write_mask = vsir_write_mask_32_from_64(write_mask); -+ -+ val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); - if (dst->reg.data_type != src->reg.data_type) - { - val_id = vkd3d_spirv_build_op_bitcast(builder, vkd3d_spirv_get_type_id_for_data_type(builder, -@@ -7171,8 +7414,15 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, - type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - - if (src[0].reg.data_type != VKD3D_DATA_BOOL) -- condition_id = spirv_compiler_emit_int_to_bool(compiler, -- VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); -+ { -+ if (instruction->handler_idx == VKD3DSIH_CMP) -+ condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, -+ vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, -+ spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); -+ else -+ condition_id = spirv_compiler_emit_int_to_bool(compiler, -+ VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); -+ } - val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src1_id, src2_id); - - spirv_compiler_emit_store_dst(compiler, dst, val_id); -@@ -7335,7 +7585,7 @@ static void spirv_compiler_emit_imad(struct spirv_compiler *compiler, - unsigned int i, component_count; - - component_count = vsir_write_mask_component_count(dst->write_mask); -- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, component_count); -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); - - for (i = 0; i < ARRAY_SIZE(src_ids); ++i) - src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], dst->write_mask); -@@ -7684,6 +7934,56 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co - spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); - } - -+static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, src0_id, src1_id, val_id; -+ -+ type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); -+ src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); -+ /* OpOrdered and OpUnordered are only available in Kernel mode. */ -+ src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); -+ src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); -+ val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); -+ if (instruction->handler_idx == VKD3DSIH_ORD) -+ val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t src0_id, src1_id, type_id, result_id; -+ unsigned int component_count; -+ SpvOp op; -+ -+ switch (instruction->handler_idx) -+ { -+ case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; -+ case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ component_count = vsir_write_mask_component_count(dst->write_mask); -+ -+ src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); -+ -+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); -+ result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id); -+ -+ result_id = spirv_compiler_emit_bool_to_float(compiler, component_count, result_id, false); -+ spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); -+} -+ - static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction, uint32_t target_block_id) - { -@@ -7702,11 +8002,31 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co - return merge_block_id; - } - -+static void spirv_compiler_end_invocation_interlock(struct spirv_compiler *compiler) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ -+ if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilitySampleRateShading)) -+ { -+ spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeSampleInterlockOrderedEXT, NULL, 0); -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderSampleInterlockEXT); -+ } -+ else -+ { -+ spirv_compiler_emit_execution_mode(compiler, SpvExecutionModePixelInterlockOrderedEXT, NULL, 0); -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderPixelInterlockEXT); -+ } -+ vkd3d_spirv_build_op(&builder->function_stream, SpvOpEndInvocationInterlockEXT); -+} -+ - static void spirv_compiler_emit_return(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - -+ if (compiler->use_invocation_interlock) -+ spirv_compiler_end_invocation_interlock(compiler); -+ - if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) - || is_in_control_point_phase(compiler))) - spirv_compiler_emit_shader_epilogue_invocation(compiler); -@@ -7790,8 +8110,9 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, - * a mismatch between the VSIR structure and the SPIR-V one, which would cause problems if - * structurisation is necessary. Therefore we emit it as a function call. */ - condition_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); -- condition_id = spirv_compiler_emit_int_to_bool(compiler, -- instruction->flags, src->reg.data_type, 1, condition_id); -+ if (src->reg.data_type != VKD3D_DATA_BOOL) -+ condition_id = spirv_compiler_emit_int_to_bool(compiler, -+ instruction->flags, src->reg.data_type, 1, condition_id); - void_id = vkd3d_spirv_get_op_type_void(builder); - vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), - &condition_id, 1); -@@ -8570,7 +8891,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, - ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); - constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); - } -- assert(dst->reg.data_type == VKD3D_DATA_UINT); - spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); - } - -@@ -8678,8 +8998,8 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; - uint32_t base_coordinate_id, component_idx; -- const struct vkd3d_shader_src_param *data; - struct vkd3d_shader_register_info reg_info; -+ struct vkd3d_shader_src_param data; - unsigned int component_count; - - if (!spirv_compiler_get_register_info(compiler, &dst->reg, ®_info)) -@@ -8691,9 +9011,9 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -- data = &src[instruction->src_count - 1]; -- assert(data->reg.data_type == VKD3D_DATA_UINT); -- val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); -+ data = src[instruction->src_count - 1]; -+ data.reg.data_type = VKD3D_DATA_UINT; -+ val_id = spirv_compiler_emit_load_src(compiler, &data, dst->write_mask); - - component_count = vsir_write_mask_component_count(dst->write_mask); - for (component_idx = 0; component_idx < component_count; ++component_idx) -@@ -8944,6 +9264,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - const struct vkd3d_shader_dst_param *resource; - uint32_t coordinate_id, sample_id, pointer_id; - struct vkd3d_shader_register_info reg_info; -+ SpvMemorySemanticsMask memory_semantic; - struct vkd3d_shader_image image; - unsigned int structure_stride; - uint32_t coordinate_mask; -@@ -9035,12 +9356,23 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - - val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type); - -+ if (instruction->flags & VKD3DARF_VOLATILE) -+ { -+ WARN("Ignoring 'volatile' attribute.\n"); -+ spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, -+ "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); -+ } -+ -+ memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) -+ ? SpvMemorySemanticsSequentiallyConsistentMask -+ : SpvMemorySemanticsMaskNone; -+ - operands[i++] = pointer_id; - operands[i++] = spirv_compiler_get_constant_uint(compiler, scope); -- operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); -+ operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); - if (instruction->src_count >= 3) - { -- operands[i++] = spirv_compiler_get_constant_uint(compiler, SpvMemorySemanticsMaskNone); -+ operands[i++] = spirv_compiler_get_constant_uint(compiler, memory_semantic); - operands[i++] = spirv_compiler_emit_load_src_with_type(compiler, &src[2], VKD3DSP_WRITEMASK_0, component_type); - } - operands[i++] = val_id; -@@ -9110,6 +9442,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; - uint32_t type_id, lod_id, val_id, miplevel_count_id; -+ enum vkd3d_shader_component_type component_type; - uint32_t constituents[VKD3D_VEC4_SIZE]; - unsigned int i, size_component_count; - struct vkd3d_shader_image image; -@@ -9146,10 +9479,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, - val_id = vkd3d_spirv_build_op_composite_construct(builder, - type_id, constituents, i + 2); - -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - if (instruction->flags == VKD3DSI_RESINFO_UINT) - { -- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); -+ /* SSA registers must match the specified result type. */ -+ if (!register_is_ssa(&dst->reg)) -+ val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); -+ else -+ component_type = VKD3D_SHADER_COMPONENT_UINT; - } - else - { -@@ -9158,7 +9497,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, - val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); - } - val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, -- VKD3D_SHADER_COMPONENT_FLOAT, src[1].swizzle, dst->write_mask); -+ component_type, src[1].swizzle, dst->write_mask); - - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } -@@ -9468,6 +9807,192 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_end_primitive(builder); - } - -+static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) -+{ -+ switch (handler_idx) -+ { -+ case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: -+ return SpvOpGroupNonUniformAllEqual; -+ case VKD3DSIH_WAVE_ALL_TRUE: -+ return SpvOpGroupNonUniformAll; -+ case VKD3DSIH_WAVE_ANY_TRUE: -+ return SpvOpGroupNonUniformAny; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id; -+ SpvOp op; -+ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); -+ -+ op = map_wave_bool_op(instruction->handler_idx); -+ type_id = vkd3d_spirv_get_op_type_bool(builder); -+ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -+ val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, -+ type_id, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static uint32_t spirv_compiler_emit_group_nonuniform_ballot(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_src_param *src) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ uint32_t type_id, val_id; -+ -+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); -+ val_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_ballot(builder, type_id, val_id); -+ -+ return val_id; -+} -+ -+static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ uint32_t val_id; -+ -+ val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) -+{ -+ switch (handler_idx) -+ { -+ case VKD3DSIH_WAVE_ACTIVE_BIT_AND: -+ return SpvOpGroupNonUniformBitwiseAnd; -+ case VKD3DSIH_WAVE_ACTIVE_BIT_OR: -+ return SpvOpGroupNonUniformBitwiseOr; -+ case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: -+ return SpvOpGroupNonUniformBitwiseXor; -+ case VKD3DSIH_WAVE_OP_ADD: -+ return is_float ? SpvOpGroupNonUniformFAdd : SpvOpGroupNonUniformIAdd; -+ case VKD3DSIH_WAVE_OP_IMAX: -+ return SpvOpGroupNonUniformSMax; -+ case VKD3DSIH_WAVE_OP_IMIN: -+ return SpvOpGroupNonUniformSMin; -+ case VKD3DSIH_WAVE_OP_MAX: -+ return SpvOpGroupNonUniformFMax; -+ case VKD3DSIH_WAVE_OP_MIN: -+ return SpvOpGroupNonUniformFMin; -+ case VKD3DSIH_WAVE_OP_MUL: -+ return is_float ? SpvOpGroupNonUniformFMul : SpvOpGroupNonUniformIMul; -+ case VKD3DSIH_WAVE_OP_UMAX: -+ return SpvOpGroupNonUniformUMax; -+ case VKD3DSIH_WAVE_OP_UMIN: -+ return SpvOpGroupNonUniformUMin; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id; -+ SpvOp op; -+ -+ op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformArithmetic); -+ val_id = vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, op, type_id, -+ vkd3d_spirv_get_op_scope_subgroup(builder), -+ (instruction->flags & VKD3DSI_WAVE_PREFIX) ? SpvGroupOperationExclusiveScan : SpvGroupOperationReduce, -+ val_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ SpvGroupOperation group_op; -+ uint32_t type_id, val_id; -+ -+ group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan -+ : SpvGroupOperationReduce; -+ -+ val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); -+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(builder, type_id, group_op, val_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_wave_is_first_lane(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ uint32_t val_id; -+ -+ val_id = vkd3d_spirv_build_op_group_nonuniform_elect(builder); -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_wave_read_lane_at(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, lane_id, val_id; -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); -+ -+ /* TODO: detect values loaded from a const buffer? */ -+ if (register_is_constant_or_undef(&src[1].reg)) -+ { -+ /* Uniform lane_id only. */ -+ val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast(builder, type_id, val_id, lane_id); -+ } -+ else -+ { -+ /* WaveReadLaneAt supports non-uniform lane ids, so if lane_id is not constant it may not be uniform. */ -+ val_id = vkd3d_spirv_build_op_group_nonuniform_shuffle(builder, type_id, val_id, lane_id); -+ } -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_wave_read_lane_first(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id; -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast_first(builder, type_id, val_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ - /* This function is called after declarations are processed. */ - static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) - { -@@ -9475,6 +10000,11 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) - - if (compiler->emit_point_size) - spirv_compiler_emit_point_size(compiler); -+ -+ /* Maybe in the future we can try to shrink the size of the interlocked -+ * section. */ -+ if (compiler->use_invocation_interlock) -+ vkd3d_spirv_build_op(&compiler->spirv_builder.function_stream, SpvOpBeginInvocationInterlockEXT); - } - - static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, -@@ -9482,6 +10012,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - { - int ret = VKD3D_OK; - -+ compiler->location = instruction->location; -+ - switch (instruction->handler_idx) - { - case VKD3DSIH_DCL_GLOBAL_FLAGS: -@@ -9549,6 +10081,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - break; - case VKD3DSIH_DMOVC: - case VKD3DSIH_MOVC: -+ case VKD3DSIH_CMP: - spirv_compiler_emit_movc(compiler, instruction); - break; - case VKD3DSIH_SWAPC: -@@ -9587,6 +10120,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_ISFINITE: - spirv_compiler_emit_isfinite(compiler, instruction); - break; -+ case VKD3DSIH_ABS: - case VKD3DSIH_ACOS: - case VKD3DSIH_ASIN: - case VKD3DSIH_ATAN: -@@ -9669,6 +10203,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_ULT: - spirv_compiler_emit_comparison_instruction(compiler, instruction); - break; -+ case VKD3DSIH_ORD: -+ case VKD3DSIH_UNO: -+ spirv_compiler_emit_orderedness_instruction(compiler, instruction); -+ break; -+ case VKD3DSIH_SLT: -+ case VKD3DSIH_SGE: -+ spirv_compiler_emit_float_comparison_instruction(compiler, instruction); -+ break; - case VKD3DSIH_BFI: - case VKD3DSIH_IBFE: - case VKD3DSIH_UBFE: -@@ -9795,8 +10337,41 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_CUT_STREAM: - spirv_compiler_emit_cut_stream(compiler, instruction); - break; -+ case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: -+ case VKD3DSIH_WAVE_ALL_TRUE: -+ case VKD3DSIH_WAVE_ANY_TRUE: -+ spirv_compiler_emit_wave_bool_op(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_ACTIVE_BALLOT: -+ spirv_compiler_emit_wave_active_ballot(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_ACTIVE_BIT_AND: -+ case VKD3DSIH_WAVE_ACTIVE_BIT_OR: -+ case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: -+ case VKD3DSIH_WAVE_OP_ADD: -+ case VKD3DSIH_WAVE_OP_IMAX: -+ case VKD3DSIH_WAVE_OP_IMIN: -+ case VKD3DSIH_WAVE_OP_MAX: -+ case VKD3DSIH_WAVE_OP_MIN: -+ case VKD3DSIH_WAVE_OP_MUL: -+ case VKD3DSIH_WAVE_OP_UMAX: -+ case VKD3DSIH_WAVE_OP_UMIN: -+ spirv_compiler_emit_wave_alu_op(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_ALL_BIT_COUNT: -+ case VKD3DSIH_WAVE_PREFIX_BIT_COUNT: -+ spirv_compiler_emit_wave_bit_count(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_IS_FIRST_LANE: -+ spirv_compiler_emit_wave_is_first_lane(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_READ_LANE_AT: -+ spirv_compiler_emit_wave_read_lane_at(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_READ_LANE_FIRST: -+ spirv_compiler_emit_wave_read_lane_first(compiler, instruction); -+ break; - case VKD3DSIH_DCL: -- case VKD3DSIH_DCL_CONSTANT_BUFFER: - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: - case VKD3DSIH_DCL_INPUT_SGV: -@@ -9892,20 +10467,19 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c - } - } - --static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, -- struct vkd3d_shader_code *spirv) -+static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv) - { - const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; - struct vkd3d_shader_instruction_array instructions; -- struct vsir_program *program = &parser->program; -+ enum vkd3d_shader_spirv_environment environment; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; - -- if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) -+ if ((result = vsir_program_normalise(program, compiler->config_flags, -+ compile_info, compiler->message_context)) < 0) - return result; - - if (program->temp_count) -@@ -9915,21 +10489,18 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - - spirv_compiler_emit_descriptor_declarations(compiler); - -- compiler->location.column = 0; -- compiler->location.line = 1; -- - if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - instructions = program->instructions; - memset(&program->instructions, 0, sizeof(program->instructions)); - -- compiler->input_signature = shader_desc->input_signature; -- compiler->output_signature = shader_desc->output_signature; -- compiler->patch_constant_signature = shader_desc->patch_constant_signature; -- memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); -- memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); -- memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); -+ compiler->input_signature = program->input_signature; -+ compiler->output_signature = program->output_signature; -+ compiler->patch_constant_signature = program->patch_constant_signature; -+ memset(&program->input_signature, 0, sizeof(program->input_signature)); -+ memset(&program->output_signature, 0, sizeof(program->output_signature)); -+ memset(&program->patch_constant_signature, 0, sizeof(program->patch_constant_signature)); - compiler->use_vocp = program->use_vocp; - compiler->block_names = program->block_names; - compiler->block_name_count = program->block_name_count; -@@ -9942,7 +10513,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - - for (i = 0; i < instructions.count && result >= 0; ++i) - { -- compiler->location.line = i + 1; - result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); - } - -@@ -9985,12 +10555,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - if (compiler->strip_debug) - vkd3d_spirv_stream_clear(&builder->debug_stream); - -- if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler))) -+ environment = spirv_compiler_get_target_environment(compiler); -+ if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler), environment)) - return VKD3D_ERROR; - -- if (TRACE_ON() || parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) -+ if (TRACE_ON() || compiler->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) - { -- enum vkd3d_shader_spirv_environment environment = spirv_compiler_get_target_environment(compiler); - struct vkd3d_string_buffer buffer; - - if (TRACE_ON()) -@@ -10018,7 +10588,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) - { - struct vkd3d_shader_code text; -- enum vkd3d_shader_spirv_environment environment = spirv_compiler_get_target_environment(compiler); - if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) - return VKD3D_ERROR; - vkd3d_shader_free_shader_code(spirv); -@@ -10028,7 +10597,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - return VKD3D_OK; - } - --int spirv_compile(struct vkd3d_shader_parser *parser, -+int spirv_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -@@ -10036,14 +10605,14 @@ int spirv_compile(struct vkd3d_shader_parser *parser, - struct spirv_compiler *spirv_compiler; - int ret; - -- if (!(spirv_compiler = spirv_compiler_create(&parser->program.shader_version, &parser->shader_desc, -- compile_info, scan_descriptor_info, message_context, &parser->location, parser->config_flags))) -+ if (!(spirv_compiler = spirv_compiler_create(program, compile_info, -+ scan_descriptor_info, message_context, config_flags))) - { - ERR("Failed to create SPIR-V compiler.\n"); - return VKD3D_ERROR; - } - -- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); -+ ret = spirv_compiler_generate_spirv(spirv_compiler, program, compile_info, out); - - spirv_compiler_destroy(spirv_compiler); - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 3be4e40ab0c..b562e815a81 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -719,14 +719,9 @@ static const enum vkd3d_data_type data_type_table[] = - /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, - }; - --static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) --{ -- return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); --} -- - static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) - { -- const struct vkd3d_shader_version *version = &sm4->p.program.shader_version; -+ const struct vkd3d_shader_version *version = &sm4->p.program->shader_version; - - return version->major >= 5 && version->minor >= 1; - } -@@ -811,7 +806,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - icb->element_count = icb_size / VKD3D_VEC4_SIZE; - icb->is_null = false; - memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); -- shader_instruction_array_add_icb(&priv->p.program.instructions, icb); -+ shader_instruction_array_add_icb(&priv->p.program->instructions, icb); - ins->declaration.icb = icb; - } - -@@ -933,6 +928,7 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) - { - struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; -+ struct vsir_program *program = priv->p.program; - unsigned int i, register_idx, register_count; - const struct shader_signature *signature; - enum vkd3d_shader_register_type type; -@@ -954,32 +950,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins - case VKD3DSPR_INCONTROLPOINT: - io_masks = priv->input_register_masks; - ranges = &priv->input_index_ranges; -- signature = &priv->p.shader_desc.input_signature; -+ signature = &program->input_signature; - break; - case VKD3DSPR_OUTPUT: - if (sm4_parser_is_in_fork_or_join_phase(priv)) - { - io_masks = priv->patch_constant_register_masks; - ranges = &priv->patch_constant_index_ranges; -- signature = &priv->p.shader_desc.patch_constant_signature; -+ signature = &program->patch_constant_signature; - } - else - { - io_masks = priv->output_register_masks; - ranges = &priv->output_index_ranges; -- signature = &priv->p.shader_desc.output_signature; -+ signature = &program->output_signature; - } - break; - case VKD3DSPR_COLOROUT: - case VKD3DSPR_OUTCONTROLPOINT: - io_masks = priv->output_register_masks; - ranges = &priv->output_index_ranges; -- signature = &priv->p.shader_desc.output_signature; -+ signature = &program->output_signature; - break; - case VKD3DSPR_PATCHCONST: - io_masks = priv->patch_constant_register_masks; - ranges = &priv->patch_constant_index_ranges; -- signature = &priv->p.shader_desc.patch_constant_signature; -+ signature = &program->patch_constant_signature; - break; - - default: -@@ -1057,16 +1053,17 @@ static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction - } - - static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) - { - enum vkd3d_sm4_input_primitive_type primitive_type; -+ struct vsir_program *program = sm4->p.program; - - primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) - { - ins->declaration.primitive_type.type = VKD3D_PT_PATCH; - ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; -- priv->p.program.input_control_point_count = ins->declaration.primitive_type.patch_vertex_count; -+ program->input_control_point_count = ins->declaration.primitive_type.patch_vertex_count; - } - else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) - { -@@ -1075,7 +1072,7 @@ static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction - else - { - ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type].vkd3d_type; -- priv->p.program.input_control_point_count = input_primitive_type_table[primitive_type].control_point_count; -+ program->input_control_point_count = input_primitive_type_table[primitive_type].control_point_count; - } - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) -@@ -1083,11 +1080,13 @@ static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction - } - - static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) - { -+ struct vsir_program *program = sm4->p.program; -+ - ins->declaration.count = *tokens; - if (opcode == VKD3D_SM4_OP_DCL_TEMPS) -- priv->p.program.temp_count = max(priv->p.program.temp_count, *tokens); -+ program->temp_count = max(program->temp_count, *tokens); - } - - static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1113,7 +1112,7 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u - if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) - { - struct signature_element *e = vsir_signature_find_element_for_reg( -- &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - - e->interpolation_mode = ins->flags; - } -@@ -1128,7 +1127,7 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in - if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) - { - struct signature_element *e = vsir_signature_find_element_for_reg( -- &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); - - e->interpolation_mode = ins->flags; - } -@@ -1183,15 +1182,17 @@ static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, - } - - static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) - { -+ struct vsir_program *program = sm4->p.program; -+ - ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) - >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; - - if (opcode == VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT) -- priv->p.program.input_control_point_count = ins->declaration.count; -+ program->input_control_point_count = ins->declaration.count; - else -- priv->p.program.output_control_point_count = ins->declaration.count; -+ program->output_control_point_count = ins->declaration.count; - } - - static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1263,6 +1264,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u - ins->declaration.tgsm_raw.byte_count = *tokens; - if (ins->declaration.tgsm_raw.byte_count % 4) - FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); -+ ins->declaration.tgsm_raw.zero_init = false; - } - - static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1274,6 +1276,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction - ins->declaration.tgsm_structured.structure_count = *tokens; - if (ins->declaration.tgsm_structured.byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); -+ ins->declaration.tgsm_structured.zero_init = false; - } - - static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1743,21 +1746,12 @@ static enum vkd3d_data_type map_data_type(char t) - } - } - --static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) --{ -- struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); -- -- vsir_program_cleanup(&parser->program); -- free_shader_desc(&parser->shader_desc); -- vkd3d_free(sm4); --} -- - static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) - { - if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) - { -- struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(&priv->p.program, 1); -+ struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(priv->p.program, 1); - - if (!(reg_idx->rel_addr = rel_addr)) - { -@@ -2035,7 +2029,7 @@ static bool register_is_control_point_input(const struct vkd3d_shader_register * - { - return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT - || (reg->type == VKD3DSPR_INPUT && (priv->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE -- || priv->p.program.shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); -+ || priv->p.program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); - } - - static uint32_t mask_from_swizzle(uint32_t swizzle) -@@ -2359,7 +2353,7 @@ static void shader_sm4_read_instruction_modifier(uint32_t modifier, struct vkd3d - static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) - { - const struct vkd3d_sm4_opcode_info *opcode_info; -- struct vsir_program *program = &sm4->p.program; -+ struct vsir_program *program = sm4->p.program; - uint32_t opcode_token, opcode, previous_token; - struct vkd3d_shader_dst_param *dst_params; - struct vkd3d_shader_src_param *src_params; -@@ -2498,13 +2492,8 @@ fail: - return; - } - --static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = --{ -- .parser_destroy = shader_sm4_destroy, --}; -- --static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, -- size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, -+static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_program *program, -+ const uint32_t *byte_code, size_t byte_code_size, const char *source_name, - struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_version version; -@@ -2563,9 +2552,9 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t - version.minor = VKD3D_SM4_VERSION_MINOR(version_token); - - /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, -- token_count / 7u + 20)) -+ if (!vsir_program_init(program, &version, token_count / 7u + 20)) - return false; -+ vkd3d_shader_parser_init(&sm4->p, program, message_context, source_name); - sm4->ptr = sm4->start; - - init_sm4_lookup_tables(&sm4->lookup); -@@ -2644,94 +2633,88 @@ static void shader_sm4_validate_default_phase_index_ranges(struct vkd3d_shader_s - return; - } - --int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -+int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, -+ struct vkd3d_shader_message_context *message_context, struct vsir_program *program) - { - struct vkd3d_shader_instruction_array *instructions; -- struct vkd3d_shader_desc *shader_desc; -+ struct vkd3d_shader_sm4_parser sm4 = {0}; -+ struct dxbc_shader_desc dxbc_desc = {0}; - struct vkd3d_shader_instruction *ins; -- struct vkd3d_shader_sm4_parser *sm4; - int ret; - -- if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) -- { -- ERR("Failed to allocate parser.\n"); -- return VKD3D_ERROR_OUT_OF_MEMORY; -- } -- -- shader_desc = &sm4->p.shader_desc; -- shader_desc->is_dxil = false; -+ dxbc_desc.is_dxil = false; - if ((ret = shader_extract_from_dxbc(&compile_info->source, -- message_context, compile_info->source_name, shader_desc)) < 0) -+ message_context, compile_info->source_name, &dxbc_desc)) < 0) - { - WARN("Failed to extract shader, vkd3d result %d.\n", ret); -- vkd3d_free(sm4); - return ret; - } - -- if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, -- compile_info->source_name, &shader_desc->output_signature, message_context)) -+ if (!shader_sm4_init(&sm4, program, dxbc_desc.byte_code, dxbc_desc.byte_code_size, -+ compile_info->source_name, message_context)) - { - WARN("Failed to initialise shader parser.\n"); -- free_shader_desc(shader_desc); -- vkd3d_free(sm4); -+ free_dxbc_shader_desc(&dxbc_desc); - return VKD3D_ERROR_INVALID_ARGUMENT; - } - -+ program->input_signature = dxbc_desc.input_signature; -+ program->output_signature = dxbc_desc.output_signature; -+ program->patch_constant_signature = dxbc_desc.patch_constant_signature; -+ memset(&dxbc_desc, 0, sizeof(dxbc_desc)); -+ - /* DXBC stores used masks inverted for output signatures, for some reason. - * We return them un-inverted. */ -- uninvert_used_masks(&shader_desc->output_signature); -- if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL) -- uninvert_used_masks(&shader_desc->patch_constant_signature); -- -- if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, -- sm4->input_register_masks, "Input") -- || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, -- sm4->output_register_masks, "Output") -- || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, -- sm4->patch_constant_register_masks, "Patch constant")) -- { -- shader_sm4_destroy(&sm4->p); -+ uninvert_used_masks(&program->output_signature); -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) -+ uninvert_used_masks(&program->patch_constant_signature); -+ -+ if (!shader_sm4_parser_validate_signature(&sm4, &program->input_signature, -+ sm4.input_register_masks, "Input") -+ || !shader_sm4_parser_validate_signature(&sm4, &program->output_signature, -+ sm4.output_register_masks, "Output") -+ || !shader_sm4_parser_validate_signature(&sm4, &program->patch_constant_signature, -+ sm4.patch_constant_register_masks, "Patch constant")) -+ { -+ vsir_program_cleanup(program); - return VKD3D_ERROR_INVALID_SHADER; - } - -- instructions = &sm4->p.program.instructions; -- while (sm4->ptr != sm4->end) -+ instructions = &program->instructions; -+ while (sm4.ptr != sm4.end) - { - if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) - { - ERR("Failed to allocate instructions.\n"); -- vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- shader_sm4_destroy(&sm4->p); -+ vkd3d_shader_parser_error(&sm4.p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -+ vsir_program_cleanup(program); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ins = &instructions->elements[instructions->count]; -- shader_sm4_read_instruction(sm4, ins); -+ shader_sm4_read_instruction(&sm4, ins); - - if (ins->handler_idx == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); -- shader_sm4_destroy(&sm4->p); -+ vsir_program_cleanup(program); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ++instructions->count; - } -- if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL -- && !sm4->has_control_point_phase && !sm4->p.failed) -- shader_sm4_validate_default_phase_index_ranges(sm4); -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL -+ && !sm4.has_control_point_phase && !sm4.p.failed) -+ shader_sm4_validate_default_phase_index_ranges(&sm4); - -- if (!sm4->p.failed) -- vsir_validate(&sm4->p); -+ if (!sm4.p.failed) -+ vkd3d_shader_parser_validate(&sm4.p, config_flags); - -- if (sm4->p.failed) -+ if (sm4.p.failed) - { - WARN("Failed to parse shader.\n"); -- shader_sm4_destroy(&sm4->p); -+ vsir_program_cleanup(program); - return VKD3D_ERROR_INVALID_SHADER; - } - -- *parser = &sm4->p; -- - return VKD3D_OK; - } - -@@ -2739,7 +2722,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - - static bool type_is_integer(const struct hlsl_type *type) - { -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: -@@ -2928,7 +2911,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, usage_idx); - put_u32(&buffer, usage); -- switch (var->data_type->base_type) -+ switch (var->data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -2989,31 +2972,39 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - { - switch (type->class) - { -- case HLSL_CLASS_ARRAY: -- return sm4_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3D_SVC_MATRIX_COLUMNS; - else - return D3D_SVC_MATRIX_ROWS; -- case HLSL_CLASS_OBJECT: -- return D3D_SVC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3D_SVC_SCALAR; -- case HLSL_CLASS_STRUCT: -- return D3D_SVC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3D_SVC_VECTOR; -- default: -- ERR("Invalid class %#x.\n", type->class); -- vkd3d_unreachable(); -+ -+ case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_STRUCT: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_VOID: -+ break; - } -+ vkd3d_unreachable(); - } - - static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) - { -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - return D3D_SVT_BOOL; -@@ -3024,68 +3015,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) - return D3D_SVT_FLOAT; - case HLSL_TYPE_INT: - return D3D_SVT_INT; -- case HLSL_TYPE_PIXELSHADER: -- return D3D_SVT_PIXELSHADER; -- case HLSL_TYPE_SAMPLER: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SVT_SAMPLER1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SVT_SAMPLER2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SVT_SAMPLER3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3D_SVT_SAMPLERCUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3D_SVT_SAMPLER; -- default: -- vkd3d_unreachable(); -- } -- break; -- case HLSL_TYPE_STRING: -- return D3D_SVT_STRING; -- case HLSL_TYPE_TEXTURE: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SVT_TEXTURE1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SVT_TEXTURE2D; -- case HLSL_SAMPLER_DIM_2DMS: -- return D3D_SVT_TEXTURE2DMS; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SVT_TEXTURE3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3D_SVT_TEXTURECUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3D_SVT_TEXTURE; -- default: -- vkd3d_unreachable(); -- } -- break; - case HLSL_TYPE_UINT: - return D3D_SVT_UINT; -- case HLSL_TYPE_VERTEXSHADER: -- return D3D_SVT_VERTEXSHADER; -- case HLSL_TYPE_VOID: -- return D3D_SVT_VOID; -- case HLSL_TYPE_UAV: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SVT_RWTEXTURE1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SVT_RWTEXTURE2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SVT_RWTEXTURE3D; -- case HLSL_SAMPLER_DIM_1DARRAY: -- return D3D_SVT_RWTEXTURE1DARRAY; -- case HLSL_SAMPLER_DIM_2DARRAY: -- return D3D_SVT_RWTEXTURE2DARRAY; -- default: -- vkd3d_unreachable(); -- } - default: - vkd3d_unreachable(); - } -@@ -3096,8 +3027,8 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); - const char *name = array_type->name ? array_type->name : ""; - const struct hlsl_profile_info *profile = ctx->profile; -- unsigned int field_count = 0, array_size = 0; -- size_t fields_offset = 0, name_offset = 0; -+ unsigned int array_size = 0; -+ size_t name_offset = 0; - size_t i; - - if (type->bytecode_offset) -@@ -3111,32 +3042,47 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - - if (array_type->class == HLSL_CLASS_STRUCT) - { -- field_count = array_type->e.record.field_count; -+ unsigned int field_count = 0; -+ size_t fields_offset = 0; - -- for (i = 0; i < field_count; ++i) -+ for (i = 0; i < array_type->e.record.field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - -+ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) -+ continue; -+ - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm4_type(ctx, buffer, field->type); -+ ++field_count; - } - - fields_offset = bytecode_align(buffer); - -- for (i = 0; i < field_count; ++i) -+ for (i = 0; i < array_type->e.record.field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - -+ if (!field->type->reg_size[HLSL_REGSET_NUMERIC]) -+ continue; -+ - put_u32(buffer, field->name_bytecode_offset); - put_u32(buffer, field->type->bytecode_offset); -- put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); -+ put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); - } -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); -+ put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); -+ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -+ put_u32(buffer, fields_offset); -+ } -+ else -+ { -+ assert(array_type->class <= HLSL_CLASS_LAST_NUMERIC); -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); -+ put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); -+ put_u32(buffer, vkd3d_make_u32(array_size, 0)); -+ put_u32(buffer, 1); - } -- -- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); -- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); -- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -- put_u32(buffer, fields_offset); - - if (profile->major_version >= 5) - { -@@ -3150,20 +3096,21 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - - static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) - { -- if (type->class == HLSL_CLASS_ARRAY) -- return sm4_resource_type(type->e.array.type); -- -- switch (type->base_type) -+ switch (type->class) - { -- case HLSL_TYPE_SAMPLER: -+ case HLSL_CLASS_ARRAY: -+ return sm4_resource_type(type->e.array.type); -+ case HLSL_CLASS_SAMPLER: - return D3D_SIT_SAMPLER; -- case HLSL_TYPE_TEXTURE: -+ case HLSL_CLASS_TEXTURE: - return D3D_SIT_TEXTURE; -- case HLSL_TYPE_UAV: -+ case HLSL_CLASS_UAV: - return D3D_SIT_UAV_RWTYPED; - default: -- vkd3d_unreachable(); -+ break; - } -+ -+ vkd3d_unreachable(); - } - - static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) -@@ -3171,7 +3118,7 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type - if (type->class == HLSL_CLASS_ARRAY) - return sm4_resource_format(type->e.array.type); - -- switch (type->e.resource.format->base_type) -+ switch (type->e.resource.format->e.numeric.type) - { - case HLSL_TYPE_DOUBLE: - return D3D_RETURN_TYPE_DOUBLE; -@@ -3328,7 +3275,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - - extern_resources[*count].name = name; - extern_resources[*count].data_type = component_type; -- extern_resources[*count].is_user_packed = false; -+ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; - - extern_resources[*count].regset = regset; - extern_resources[*count].id = var->regs[regset].id + regset_offset; -@@ -3428,10 +3375,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - if (profile->major_version >= 5) - { -- put_u32(&buffer, TAG_RD11); -+ put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); - put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ - put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ -- put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ -+ put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ - put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ - put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ - put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -@@ -3448,6 +3395,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - const struct extern_resource *resource = &extern_resources[i]; - uint32_t flags = 0; - -+ if (hlsl_version_ge(ctx, 5, 1)) -+ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); -+ - if (resource->is_user_packed) - flags |= D3D_SIF_USERPACKED; - -@@ -3480,6 +3430,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - if (!cbuffer->reg.allocated) - continue; - -+ if (hlsl_version_ge(ctx, 5, 1)) -+ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); -+ - if (cbuffer->reservation.reg_type) - flags |= D3D_SIF_USERPACKED; - -@@ -3523,8 +3476,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform && var->buffer == cbuffer -- && var->data_type->class != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - ++var_count; - } - -@@ -3558,8 +3510,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform && var->buffer == cbuffer -- && var->data_type->class != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - { - uint32_t flags = 0; - -@@ -3586,8 +3537,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - j = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform && var->buffer == cbuffer -- && var->data_type->class != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC]) - { - const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); - size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); -@@ -4598,7 +4548,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node - enum hlsl_sampler_dim dim) - { - const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); -- bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE -+ bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); - unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; -@@ -4756,11 +4706,11 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - -- assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -- if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - - sm4_dst_from_node(&instr.dsts[0], dst); -@@ -4785,11 +4735,11 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir - return; - } - -- assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_RESINFO; -- if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - - sm4_dst_from_node(&instr.dsts[0], dst); -@@ -4804,7 +4754,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir - - static bool type_is_float(const struct hlsl_type *type) - { -- return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; -+ return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; - } - - static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, -@@ -4841,11 +4791,11 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); - -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -@@ -4874,7 +4824,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_TYPE_INT: -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -@@ -4900,7 +4850,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_TYPE_UINT: -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -@@ -4970,7 +4920,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - switch (expr->op) - { - case HLSL_OP1_ABS: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS); -@@ -5051,12 +5001,12 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP1_LOGIC_NOT: -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_NEG: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG); -@@ -5109,7 +5059,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_ADD: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); -@@ -5141,7 +5091,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_DIV: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); -@@ -5157,7 +5107,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_DOT: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - switch (arg1->data_type->dimx) -@@ -5189,9 +5139,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); -@@ -5215,9 +5165,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); -@@ -5244,9 +5194,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); -@@ -5270,23 +5220,23 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - - case HLSL_OP2_LOGIC_AND: -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: - assert(type_is_integer(dst_type)); -- assert(dst_type->base_type != HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_MAX: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); -@@ -5306,7 +5256,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_MIN: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); -@@ -5326,7 +5276,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_MOD: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); -@@ -5338,7 +5288,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_MUL: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); -@@ -5360,9 +5310,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); -@@ -5384,12 +5334,12 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - - case HLSL_OP2_RSHIFT: - assert(type_is_integer(dst_type)); -- assert(dst_type->base_type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, -+ assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; - -- case HLSL_OP3_MOVC: -+ case HLSL_OP3_TERNARY: - write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); - break; - -@@ -5445,7 +5395,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju - - case HLSL_IR_JUMP_DISCARD_NZ: - { -- instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; -+ instr.opcode = VKD3D_SM4_OP_DISCARD; -+ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; - - memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.src_count = 1; -@@ -5486,7 +5437,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo - instr.dst_count = 1; - - assert(hlsl_is_numeric_type(type)); -- if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) -+ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) - { - struct hlsl_constant_value value; - -@@ -5746,18 +5697,12 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - { - if (instr->data_type) - { -- if (instr->data_type->class == HLSL_CLASS_MATRIX) -+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) - { -- hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); -+ hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", -+ instr->data_type->class); - break; - } -- else if (instr->data_type->class == HLSL_CLASS_OBJECT) -- { -- hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); -- break; -- } -- -- assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); - - if (!instr->reg.allocated) - { -@@ -5854,13 +5799,21 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -+ { -+ if (hlsl_version_ge(ctx, 5, 1)) -+ hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); -+ - write_sm4_dcl_constant_buffer(&tpf, cbuffer); -+ } - } - - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - -+ if (hlsl_version_ge(ctx, 5, 1)) -+ hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); -+ - if (resource->regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 4f400d19f6f..b8dd0dba377 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -23,6 +23,8 @@ - #include - #include - -+/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ -+ - static inline int char_to_int(char c) - { - if ('0' <= c && c <= '9') -@@ -71,8 +73,16 @@ void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer) - - void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer) - { -- buffer->buffer[0] = '\0'; -- buffer->content_size = 0; -+ vkd3d_string_buffer_truncate(buffer, 0); -+} -+ -+void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size) -+{ -+ if (size < buffer->content_size) -+ { -+ buffer->buffer[size] = '\0'; -+ buffer->content_size = size; -+ } - } - - static bool vkd3d_string_buffer_resize(struct vkd3d_string_buffer *buffer, int rc) -@@ -224,6 +234,16 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct - cache->buffers[cache->count++] = buffer; - } - -+void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer) -+{ -+ code->code = buffer->buffer; -+ code->size = buffer->content_size; -+ -+ buffer->buffer = NULL; -+ buffer->buffer_size = 0; -+ buffer->content_size = 0; -+} -+ - void vkd3d_shader_message_context_init(struct vkd3d_shader_message_context *context, - enum vkd3d_shader_log_level log_level) - { -@@ -520,7 +540,7 @@ static const struct vkd3d_debug_option vkd3d_shader_config_options[] = - {"force_validation", VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION}, /* force validation of internal shader representations */ - }; - --static uint64_t vkd3d_shader_init_config_flags(void) -+uint64_t vkd3d_shader_init_config_flags(void) - { - uint64_t config_flags; - const char *config; -@@ -534,18 +554,14 @@ static uint64_t vkd3d_shader_init_config_flags(void) - return config_flags; - } - --bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, -- struct vkd3d_shader_message_context *message_context, const char *source_name, -- const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, -- unsigned int instruction_reserve) -+void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context, const char *source_name) - { - parser->message_context = message_context; - parser->location.source_name = source_name; - parser->location.line = 1; - parser->location.column = 0; -- parser->ops = ops; -- parser->config_flags = vkd3d_shader_init_config_flags(); -- return vsir_program_init(&parser->program, version, instruction_reserve); -+ parser->program = program; - } - - void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, -@@ -1375,9 +1391,9 @@ static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_des - vkd3d_free(scan_descriptor_info->descriptors); - } - --static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, -+static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, -- struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) -+ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) - { - struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; - struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; -@@ -1408,27 +1424,27 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - descriptor_info1 = &local_descriptor_info1; - } - -- vkd3d_shader_scan_context_init(&context, &parser->program.shader_version, compile_info, -+ vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, - descriptor_info1, combined_sampler_info, message_context); - - if (TRACE_ON()) -- vkd3d_shader_trace(&parser->program); -+ vkd3d_shader_trace(program); - -- for (i = 0; i < parser->program.instructions.count; ++i) -+ for (i = 0; i < program->instructions.count; ++i) - { -- instruction = &parser->program.instructions.elements[i]; -+ instruction = &program->instructions.elements[i]; - if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) - break; - } - -- for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) -+ for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) - { -- unsigned int size = parser->shader_desc.flat_constant_count[i].external; - struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; - struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; -+ unsigned int size = program->flat_constant_count[i]; - struct vkd3d_shader_descriptor_info1 *d; - -- if (parser->shader_desc.flat_constant_count[i].external) -+ if (size) - { - if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, - &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) -@@ -1438,11 +1454,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - - if (!ret && signature_info) - { -- if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) -+ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &program->input_signature) - || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, -- &parser->shader_desc.output_signature) -+ &program->output_signature) - || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, -- &parser->shader_desc.patch_constant_signature)) -+ &program->patch_constant_signature)) - { - ret = VKD3D_ERROR_OUT_OF_MEMORY; - } -@@ -1470,60 +1486,6 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - return ret; - } - --static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = scan_with_parser(compile_info, message_context, NULL, parser); -- vkd3d_shader_parser_destroy(parser); -- -- return ret; --} -- --static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = scan_with_parser(compile_info, message_context, NULL, parser); -- vkd3d_shader_parser_destroy(parser); -- -- return ret; --} -- --static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = scan_with_parser(compile_info, message_context, NULL, parser); -- vkd3d_shader_parser_destroy(parser); -- -- return ret; --} -- - int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) - { - struct vkd3d_shader_message_context message_context; -@@ -1543,29 +1505,45 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - - vkd3d_shader_dump_shader(compile_info); - -- switch (compile_info->source_type) -+ if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) - { -- case VKD3D_SHADER_SOURCE_DXBC_TPF: -- ret = scan_dxbc(compile_info, &message_context); -- break; -+ FIXME("HLSL support not implemented.\n"); -+ ret = VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ else -+ { -+ uint64_t config_flags = vkd3d_shader_init_config_flags(); -+ struct vsir_program program; - -- case VKD3D_SHADER_SOURCE_HLSL: -- FIXME("HLSL support not implemented.\n"); -- ret = VKD3D_ERROR_NOT_IMPLEMENTED; -- break; -+ switch (compile_info->source_type) -+ { -+ case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -+ ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); -+ break; - -- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -- ret = scan_d3dbc(compile_info, &message_context); -- break; -+ case VKD3D_SHADER_SOURCE_DXBC_TPF: -+ ret = tpf_parse(compile_info, config_flags, &message_context, &program); -+ break; - -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: -- ret = scan_dxil(compile_info, &message_context); -- break; -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = dxil_parse(compile_info, config_flags, &message_context, &program); -+ break; - -- default: -- ERR("Unsupported source type %#x.\n", compile_info->source_type); -- ret = VKD3D_ERROR_INVALID_ARGUMENT; -- break; -+ default: -+ ERR("Unsupported source type %#x.\n", compile_info->source_type); -+ ret = VKD3D_ERROR_INVALID_ARGUMENT; -+ break; -+ } -+ -+ if (ret < 0) -+ { -+ WARN("Failed to parse shader.\n"); -+ } -+ else -+ { -+ ret = vsir_program_scan(&program, compile_info, &message_context, NULL); -+ vsir_program_cleanup(&program); -+ } - } - - vkd3d_shader_message_context_trace_messages(&message_context); -@@ -1575,12 +1553,11 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - return ret; - } - --static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; -- struct vkd3d_glsl_generator *glsl_generator; - struct vkd3d_shader_compile_info scan_info; - int ret; - -@@ -1589,30 +1566,22 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - switch (compile_info->target_type) - { - case VKD3D_SHADER_TARGET_D3D_ASM: -- ret = vkd3d_dxbc_binary_to_text(&parser->program, compile_info, out, VSIR_ASM_D3D); -+ ret = d3d_asm_compile(program, compile_info, out, VSIR_ASM_FLAG_NONE); - break; - - case VKD3D_SHADER_TARGET_GLSL: -- if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) -+ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) - return ret; -- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->program.shader_version, -- message_context, &parser->location))) -- { -- ERR("Failed to create GLSL generator.\n"); -- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); -- return VKD3D_ERROR; -- } -- -- ret = vkd3d_glsl_generator_generate(glsl_generator, &parser->program, out); -- vkd3d_glsl_generator_destroy(glsl_generator); -+ ret = glsl_compile(program, config_flags, compile_info, out, message_context); - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - - case VKD3D_SHADER_TARGET_SPIRV_BINARY: - case VKD3D_SHADER_TARGET_SPIRV_TEXT: -- if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) -+ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) - return ret; -- ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); -+ ret = spirv_compile(program, config_flags, &scan_descriptor_info, -+ compile_info, out, message_context); - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - -@@ -1624,24 +1593,6 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - return ret; - } - --static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); -- -- vkd3d_shader_parser_destroy(parser); -- return ret; --} -- - static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { -@@ -1657,42 +1608,6 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - --static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); -- -- vkd3d_shader_parser_destroy(parser); -- return ret; --} -- --static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) --{ -- struct vkd3d_shader_parser *parser; -- int ret; -- -- if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -- { -- WARN("Failed to initialise shader parser.\n"); -- return ret; -- } -- -- ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); -- -- vkd3d_shader_parser_destroy(parser); -- return ret; --} -- - int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, char **messages) - { -@@ -1713,26 +1628,44 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - - vkd3d_shader_dump_shader(compile_info); - -- switch (compile_info->source_type) -+ if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) - { -- case VKD3D_SHADER_SOURCE_DXBC_TPF: -- ret = compile_dxbc_tpf(compile_info, out, &message_context); -- break; -+ ret = compile_hlsl(compile_info, out, &message_context); -+ } -+ else -+ { -+ uint64_t config_flags = vkd3d_shader_init_config_flags(); -+ struct vsir_program program; - -- case VKD3D_SHADER_SOURCE_HLSL: -- ret = compile_hlsl(compile_info, out, &message_context); -- break; -+ switch (compile_info->source_type) -+ { -+ case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -+ ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); -+ break; - -- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -- ret = compile_d3d_bytecode(compile_info, out, &message_context); -- break; -+ case VKD3D_SHADER_SOURCE_DXBC_TPF: -+ ret = tpf_parse(compile_info, config_flags, &message_context, &program); -+ break; - -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: -- ret = compile_dxbc_dxil(compile_info, out, &message_context); -- break; -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = dxil_parse(compile_info, config_flags, &message_context, &program); -+ break; - -- default: -- vkd3d_unreachable(); -+ default: -+ ERR("Unsupported source type %#x.\n", compile_info->source_type); -+ ret = VKD3D_ERROR_INVALID_ARGUMENT; -+ break; -+ } -+ -+ if (ret < 0) -+ { -+ WARN("Failed to parse shader.\n"); -+ } -+ else -+ { -+ ret = vsir_program_compile(&program, config_flags, compile_info, out, &message_context); -+ vsir_program_cleanup(&program); -+ } - } - - vkd3d_shader_message_context_trace_messages(&message_context); -@@ -1937,13 +1870,18 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - VKD3D_SHADER_TARGET_SPIRV_TEXT, - #endif - VKD3D_SHADER_TARGET_D3D_ASM, --#if 0 -+#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL - VKD3D_SHADER_TARGET_GLSL, - #endif - }; - - static const enum vkd3d_shader_target_type hlsl_types[] = - { -+ VKD3D_SHADER_TARGET_SPIRV_BINARY, -+#ifdef HAVE_SPIRV_TOOLS -+ VKD3D_SHADER_TARGET_SPIRV_TEXT, -+#endif -+ VKD3D_SHADER_TARGET_D3D_ASM, - VKD3D_SHADER_TARGET_D3D_BYTECODE, - VKD3D_SHADER_TARGET_DXBC_TPF, - VKD3D_SHADER_TARGET_FX, -@@ -1958,13 +1896,21 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - VKD3D_SHADER_TARGET_D3D_ASM, - }; - -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ static const enum vkd3d_shader_target_type dxbc_dxil_types[] = -+ { -+ VKD3D_SHADER_TARGET_SPIRV_BINARY, -+# ifdef HAVE_SPIRV_TOOLS -+ VKD3D_SHADER_TARGET_SPIRV_TEXT, -+# endif -+ VKD3D_SHADER_TARGET_D3D_ASM, -+ }; -+#endif -+ - TRACE("source_type %#x, count %p.\n", source_type, count); - - switch (source_type) - { --#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: --#endif - case VKD3D_SHADER_SOURCE_DXBC_TPF: - *count = ARRAY_SIZE(dxbc_tpf_types); - return dxbc_tpf_types; -@@ -1977,6 +1923,12 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - *count = ARRAY_SIZE(d3dbc_types); - return d3dbc_types; - -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ *count = ARRAY_SIZE(dxbc_dxil_types); -+ return dxbc_dxil_types; -+#endif -+ - default: - *count = 0; - return NULL; -@@ -2050,7 +2002,7 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, - { - void *params; - -- if (count > allocator->count - allocator->index) -+ if (!allocator->current || count > allocator->count - allocator->index) - { - struct vkd3d_shader_param_node *next; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 2d3b3254638..29b8d6ad022 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -100,6 +100,7 @@ enum vkd3d_shader_error - - VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, - VKD3D_SHADER_WARNING_SPV_INVALID_UAV_FLAGS = 2301, -+ VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG = 2302, - - VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, - VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, -@@ -148,6 +149,8 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, - VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, - VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, -+ VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, -+ VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -@@ -199,6 +202,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_WARNING_DXIL_INVALID_MASK = 8307, - VKD3D_SHADER_WARNING_DXIL_INVALID_OPERATION = 8308, - VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT = 8309, -+ VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND = 8310, - - VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, - VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER = 9001, -@@ -218,6 +222,8 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, - VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW = 9016, - VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION = 9018, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019, - - VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, - }; -@@ -445,6 +451,7 @@ enum vkd3d_shader_opcode - VKD3DSIH_NOT, - VKD3DSIH_NRM, - VKD3DSIH_OR, -+ VKD3DSIH_ORD, - VKD3DSIH_PHASE, - VKD3DSIH_PHI, - VKD3DSIH_POW, -@@ -516,10 +523,31 @@ enum vkd3d_shader_opcode - VKD3DSIH_UMAX, - VKD3DSIH_UMIN, - VKD3DSIH_UMUL, -+ VKD3DSIH_UNO, - VKD3DSIH_USHR, - VKD3DSIH_UTOD, - VKD3DSIH_UTOF, - VKD3DSIH_UTOU, -+ VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL, -+ VKD3DSIH_WAVE_ACTIVE_BALLOT, -+ VKD3DSIH_WAVE_ACTIVE_BIT_AND, -+ VKD3DSIH_WAVE_ACTIVE_BIT_OR, -+ VKD3DSIH_WAVE_ACTIVE_BIT_XOR, -+ VKD3DSIH_WAVE_ALL_BIT_COUNT, -+ VKD3DSIH_WAVE_ALL_TRUE, -+ VKD3DSIH_WAVE_ANY_TRUE, -+ VKD3DSIH_WAVE_IS_FIRST_LANE, -+ VKD3DSIH_WAVE_OP_ADD, -+ VKD3DSIH_WAVE_OP_IMAX, -+ VKD3DSIH_WAVE_OP_IMIN, -+ VKD3DSIH_WAVE_OP_MAX, -+ VKD3DSIH_WAVE_OP_MIN, -+ VKD3DSIH_WAVE_OP_MUL, -+ VKD3DSIH_WAVE_OP_UMAX, -+ VKD3DSIH_WAVE_OP_UMIN, -+ VKD3DSIH_WAVE_PREFIX_BIT_COUNT, -+ VKD3DSIH_WAVE_READ_LANE_AT, -+ VKD3DSIH_WAVE_READ_LANE_FIRST, - VKD3DSIH_XOR, - - VKD3DSIH_INVALID, -@@ -583,6 +611,8 @@ enum vkd3d_shader_register_type - VKD3DSPR_OUTSTENCILREF, - VKD3DSPR_UNDEF, - VKD3DSPR_SSA, -+ VKD3DSPR_WAVELANECOUNT, -+ VKD3DSPR_WAVELANEINDEX, - - VKD3DSPR_COUNT, - -@@ -620,14 +650,16 @@ enum vkd3d_data_type - VKD3D_DATA_UINT8, - VKD3D_DATA_UINT64, - VKD3D_DATA_BOOL, -+ VKD3D_DATA_UINT16, -+ VKD3D_DATA_HALF, - - VKD3D_DATA_COUNT, - }; - - static inline bool data_type_is_integer(enum vkd3d_data_type data_type) - { -- return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT -- || data_type == VKD3D_DATA_UINT64; -+ return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT16 -+ || data_type == VKD3D_DATA_UINT || data_type == VKD3D_DATA_UINT64; - } - - static inline bool data_type_is_bool(enum vkd3d_data_type data_type) -@@ -635,6 +667,11 @@ static inline bool data_type_is_bool(enum vkd3d_data_type data_type) - return data_type == VKD3D_DATA_BOOL; - } - -+static inline bool data_type_is_floating_point(enum vkd3d_data_type data_type) -+{ -+ return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; -+} -+ - static inline bool data_type_is_64_bit(enum vkd3d_data_type data_type) - { - return data_type == VKD3D_DATA_DOUBLE || data_type == VKD3D_DATA_UINT64; -@@ -749,11 +786,21 @@ enum vkd3d_shader_uav_flags - VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, - }; - -+enum vkd3d_shader_atomic_rmw_flags -+{ -+ VKD3DARF_SEQ_CST = 0x1, -+ VKD3DARF_VOLATILE = 0x2, -+}; -+ - enum vkd3d_tessellator_domain - { -+ VKD3D_TESSELLATOR_DOMAIN_INVALID = 0, -+ - VKD3D_TESSELLATOR_DOMAIN_LINE = 1, - VKD3D_TESSELLATOR_DOMAIN_TRIANGLE = 2, - VKD3D_TESSELLATOR_DOMAIN_QUAD = 3, -+ -+ VKD3D_TESSELLATOR_DOMAIN_COUNT = 4, - }; - - #define VKD3DSI_NONE 0x0 -@@ -764,6 +811,7 @@ enum vkd3d_tessellator_domain - #define VKD3DSI_SAMPLE_INFO_UINT 0x1 - #define VKD3DSI_SAMPLER_COMPARISON_MODE 0x1 - #define VKD3DSI_SHIFT_UNMASKED 0x1 -+#define VKD3DSI_WAVE_PREFIX 0x1 - - #define VKD3DSI_PRECISE_X 0x100 - #define VKD3DSI_PRECISE_Y 0x200 -@@ -808,6 +856,8 @@ enum vkd3d_shader_type - VKD3D_SHADER_TYPE_COUNT, - }; - -+struct vkd3d_shader_message_context; -+ - struct vkd3d_shader_version - { - enum vkd3d_shader_type type; -@@ -1025,7 +1075,7 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade - unsigned int reg_idx, unsigned int write_mask); - void shader_signature_cleanup(struct shader_signature *signature); - --struct vkd3d_shader_desc -+struct dxbc_shader_desc - { - const uint32_t *byte_code; - size_t byte_code_size; -@@ -1033,11 +1083,6 @@ struct vkd3d_shader_desc - struct shader_signature input_signature; - struct shader_signature output_signature; - struct shader_signature patch_constant_signature; -- -- struct -- { -- uint32_t used, external; -- } flat_constant_count[3]; - }; - - struct vkd3d_shader_register_semantic -@@ -1079,14 +1124,18 @@ struct vkd3d_shader_tgsm - struct vkd3d_shader_tgsm_raw - { - struct vkd3d_shader_dst_param reg; -+ unsigned int alignment; - unsigned int byte_count; -+ bool zero_init; - }; - - struct vkd3d_shader_tgsm_structured - { - struct vkd3d_shader_dst_param reg; -+ unsigned int alignment; - unsigned int byte_stride; - unsigned int structure_count; -+ bool zero_init; - }; - - struct vkd3d_shader_thread_group_size -@@ -1121,6 +1170,8 @@ enum vkd3d_primitive_type - VKD3D_PT_TRIANGLELIST_ADJ = 12, - VKD3D_PT_TRIANGLESTRIP_ADJ = 13, - VKD3D_PT_PATCH = 14, -+ -+ VKD3D_PT_COUNT = 15, - }; - - struct vkd3d_shader_primitive_type -@@ -1216,6 +1267,12 @@ static inline bool register_is_scalar_constant_zero(const struct vkd3d_shader_re - && (data_type_is_64_bit(reg->data_type) ? !reg->u.immconst_u64[0] : !reg->u.immconst_u32[0]); - } - -+static inline bool register_is_numeric_array(const struct vkd3d_shader_register *reg) -+{ -+ return (reg->type == VKD3DSPR_IMMCONSTBUFFER || reg->type == VKD3DSPR_IDXTEMP -+ || reg->type == VKD3DSPR_GROUPSHAREDMEM); -+} -+ - static inline bool vsir_register_is_label(const struct vkd3d_shader_register *reg) - { - return reg->type == VKD3DSPR_LABEL; -@@ -1268,6 +1325,8 @@ struct vkd3d_shader_instruction_array - struct vkd3d_shader_immediate_constant_buffer **icbs; - size_t icb_capacity; - size_t icb_count; -+ -+ struct vkd3d_shader_src_param *outpointid_param; - }; - - bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); -@@ -1278,6 +1337,8 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins - struct vkd3d_shader_immediate_constant_buffer *icb); - bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, - unsigned int dst, unsigned int src); -+struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( -+ struct vkd3d_shader_instruction_array *instructions); - void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); - - enum vkd3d_shader_config_flags -@@ -1290,7 +1351,12 @@ struct vsir_program - struct vkd3d_shader_version shader_version; - struct vkd3d_shader_instruction_array instructions; - -+ struct shader_signature input_signature; -+ struct shader_signature output_signature; -+ struct shader_signature patch_constant_signature; -+ - unsigned int input_control_point_count, output_control_point_count; -+ unsigned int flat_constant_count[3]; - unsigned int block_count; - unsigned int temp_count; - unsigned int ssa_count; -@@ -1300,8 +1366,15 @@ struct vsir_program - size_t block_name_count; - }; - --bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); - void vsir_program_cleanup(struct vsir_program *program); -+int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context); -+bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); -+enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); -+enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, -+ const char *source_name, struct vkd3d_shader_message_context *message_context); - - static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( - struct vsir_program *program, unsigned int count) -@@ -1319,32 +1392,21 @@ struct vkd3d_shader_parser - { - struct vkd3d_shader_message_context *message_context; - struct vkd3d_shader_location location; -+ struct vsir_program *program; - bool failed; -- -- struct vkd3d_shader_desc shader_desc; -- const struct vkd3d_shader_parser_ops *ops; -- struct vsir_program program; -- -- uint64_t config_flags; --}; -- --struct vkd3d_shader_parser_ops --{ -- void (*parser_destroy)(struct vkd3d_shader_parser *parser); - }; - - void vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); --bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, -- struct vkd3d_shader_message_context *message_context, const char *source_name, -- const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, -- unsigned int instruction_reserve); -+void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context, const char *source_name); - void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); - --static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parser) -+static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser, uint64_t config_flags) - { -- parser->ops->parser_destroy(parser); -+ return vsir_program_validate(parser->program, config_flags, -+ parser->location.source_name, parser->message_context); - } - - struct vkd3d_shader_descriptor_info1 -@@ -1385,21 +1447,22 @@ struct vkd3d_string_buffer_cache - size_t count, max_count, capacity; - }; - --enum vsir_asm_dialect -+enum vsir_asm_flags - { -- VSIR_ASM_VSIR, -- VSIR_ASM_D3D, -+ VSIR_ASM_FLAG_NONE = 0, -+ VSIR_ASM_FLAG_DUMP_TYPES = 0x1, - }; - --enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, -+enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect); -+ struct vkd3d_shader_code *out, enum vsir_asm_flags flags); - void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); - struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); - void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); - void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *list); - void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *list); - void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer); -+void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size); - int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); - int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); - int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); -@@ -1408,6 +1471,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct - vkd3d_string_buffer_trace_(buffer, __FUNCTION__) - void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function); - int vkd3d_string_buffer_vprintf(struct vkd3d_string_buffer *buffer, const char *format, va_list args); -+void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer); - - struct vkd3d_bytecode_buffer - { -@@ -1472,35 +1536,32 @@ void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const s - enum vkd3d_shader_error error, const char *format, va_list args); - - void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info); -+uint64_t vkd3d_shader_init_config_flags(void); - void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); - #define vkd3d_shader_trace_text(text, size) \ - vkd3d_shader_trace_text_(text, size, __FUNCTION__) - --int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); --int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); --int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); -+int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, -+ struct vkd3d_shader_message_context *message_context, struct vsir_program *program); -+int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, -+ struct vkd3d_shader_message_context *message_context, struct vsir_program *program); -+int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, -+ struct vkd3d_shader_message_context *message_context, struct vsir_program *program); - --void free_shader_desc(struct vkd3d_shader_desc *desc); -+void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); - - int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, -- struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); -+ struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc); - int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); - --struct vkd3d_glsl_generator; -- --struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, -- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); --int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, -- struct vsir_program *program, struct vkd3d_shader_code *out); --void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); -+int glsl_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context); - - #define SPIRV_MAX_SRC_COUNT 6 - --int spirv_compile(struct vkd3d_shader_parser *parser, -+int spirv_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -@@ -1513,17 +1574,17 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, - int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - --enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser); -- - static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type( - enum vkd3d_data_type data_type) - { - switch (data_type) - { -+ case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ - case VKD3D_DATA_FLOAT: - case VKD3D_DATA_UNORM: - case VKD3D_DATA_SNORM: - return VKD3D_SHADER_COMPONENT_FLOAT; -+ case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ - case VKD3D_DATA_UINT: - return VKD3D_SHADER_COMPONENT_UINT; - case VKD3D_DATA_INT: -@@ -1585,6 +1646,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc - } - } - -+static inline bool component_type_is_64_bit(enum vkd3d_shader_component_type component_type) -+{ -+ return component_type == VKD3D_SHADER_COMPONENT_DOUBLE || component_type == VKD3D_SHADER_COMPONENT_UINT64; -+} -+ - enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, - unsigned int index); - -@@ -1724,6 +1790,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ - return compacted_swizzle; - } - -+static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask) -+{ -+ static const unsigned int swizzles[16] = -+ { -+ 0, -+ VKD3D_SHADER_SWIZZLE(X, X, X, X), -+ VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), -+ VKD3D_SHADER_SWIZZLE(X, Y, X, X), -+ VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), -+ VKD3D_SHADER_SWIZZLE(X, Z, X, X), -+ VKD3D_SHADER_SWIZZLE(Y, Z, X, X), -+ VKD3D_SHADER_SWIZZLE(X, Y, Z, X), -+ VKD3D_SHADER_SWIZZLE(W, W, W, W), -+ VKD3D_SHADER_SWIZZLE(X, W, X, X), -+ VKD3D_SHADER_SWIZZLE(Y, W, X, X), -+ VKD3D_SHADER_SWIZZLE(X, Y, W, X), -+ VKD3D_SHADER_SWIZZLE(Z, W, X, X), -+ VKD3D_SHADER_SWIZZLE(X, Z, W, X), -+ VKD3D_SHADER_SWIZZLE(Y, Z, W, X), -+ VKD3D_SHADER_SWIZZLE(X, Y, Z, W), -+ }; -+ -+ return swizzles[writemask & 0xf]; -+} -+ - struct vkd3d_struct - { - enum vkd3d_shader_structure_type type; -@@ -1760,7 +1851,4 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void - void dxbc_writer_init(struct dxbc_writer *dxbc); - int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); - --enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info); -- - #endif /* __VKD3D_SHADER_PRIVATE_H */ -diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c -new file mode 100644 -index 00000000000..56ba6990420 ---- /dev/null -+++ b/libs/vkd3d/libs/vkd3d/cache.c -@@ -0,0 +1,59 @@ -+/* -+ * Copyright 2024 Stefan Dösinger for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#include "vkd3d_private.h" -+ -+struct vkd3d_shader_cache -+{ -+ unsigned int refcount; -+}; -+ -+int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) -+{ -+ struct vkd3d_shader_cache *object; -+ -+ TRACE("%p.\n", cache); -+ -+ object = vkd3d_malloc(sizeof(*object)); -+ if (!object) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ object->refcount = 1; -+ *cache = object; -+ -+ return VKD3D_OK; -+} -+ -+unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache) -+{ -+ unsigned int refcount = vkd3d_atomic_increment_u32(&cache->refcount); -+ TRACE("cache %p refcount %u.\n", cache, refcount); -+ return refcount; -+} -+ -+unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) -+{ -+ unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount); -+ TRACE("cache %p refcount %u.\n", cache, refcount); -+ -+ if (refcount) -+ return refcount; -+ -+ vkd3d_free(cache); -+ return 0; -+} -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 7115a74a6f2..95366d3441b 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -2052,20 +2052,15 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - * state when GPU finishes execution of a command list. */ - if (is_swapchain_image) - { -- if (resource->present_state == D3D12_RESOURCE_STATE_PRESENT) -- { -- *access_mask = VK_ACCESS_MEMORY_READ_BIT; -- *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; -- if (image_layout) -- *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; -- return true; -- } -- else if (resource->present_state != D3D12_RESOURCE_STATE_COMMON) -- { -- vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, -+ if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) -+ return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, - resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); -- return true; -- } -+ -+ *access_mask = VK_ACCESS_MEMORY_READ_BIT; -+ *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; -+ if (image_layout) -+ *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; -+ return true; - } - - *access_mask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; -@@ -5414,6 +5409,26 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 - | ((colour->uint32[2] & 0x3ff) << 22); - return vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); - -+ case DXGI_FORMAT_B5G6R5_UNORM: -+ colour->uint32[0] = (colour->uint32[2] & 0x1f) -+ | ((colour->uint32[1] & 0x3f) << 5) -+ | ((colour->uint32[0] & 0x1f) << 11); -+ return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); -+ -+ case DXGI_FORMAT_B5G5R5A1_UNORM: -+ colour->uint32[0] = (colour->uint32[2] & 0x1f) -+ | ((colour->uint32[1] & 0x1f) << 5) -+ | ((colour->uint32[0] & 0x1f) << 10) -+ | ((colour->uint32[3] & 0x1) << 15); -+ return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); -+ -+ case DXGI_FORMAT_B4G4R4A4_UNORM: -+ colour->uint32[0] = (colour->uint32[2] & 0xf) -+ | ((colour->uint32[1] & 0xf) << 4) -+ | ((colour->uint32[0] & 0xf) << 8) -+ | ((colour->uint32[3] & 0xf) << 12); -+ return vkd3d_get_format(device, DXGI_FORMAT_R16_UINT, false); -+ - default: - return NULL; - } -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 17c7ccb3e31..c8cfea43cc1 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -89,11 +89,13 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), - VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), - /* EXT extensions */ -+ VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), - VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), - VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), - VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), - VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), - VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), -+ VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), - VK_EXTENSION(EXT_MUTABLE_DESCRIPTOR_TYPE, EXT_mutable_descriptor_type), - VK_EXTENSION(EXT_ROBUSTNESS_2, EXT_robustness2), - VK_EXTENSION(EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_shader_demote_to_helper_invocation), -@@ -270,13 +272,15 @@ static bool has_extension(const VkExtensionProperties *extensions, - - for (i = 0; i < count; ++i) - { -- if (is_extension_disabled(extension_name)) -- { -- WARN("Extension %s is disabled.\n", debugstr_a(extension_name)); -- continue; -- } - if (!strcmp(extensions[i].extensionName, extension_name)) -+ { -+ if (is_extension_disabled(extension_name)) -+ { -+ WARN("Extension %s is disabled.\n", debugstr_a(extension_name)); -+ return false; -+ } - return true; -+ } - } - return false; - } -@@ -420,8 +424,6 @@ static HRESULT vkd3d_init_instance_caps(struct vkd3d_instance *instance, - ERR("Failed to enumerate instance extensions, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } -- if (!count) -- return S_OK; - - if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) - return E_OUTOFMEMORY; -@@ -557,12 +559,14 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - const struct vkd3d_optional_instance_extensions_info *optional_extensions; - const struct vkd3d_application_info *vkd3d_application_info; - const struct vkd3d_host_time_domain_info *time_domain_info; -+ PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion; - bool *user_extension_supported = NULL; - VkApplicationInfo application_info; - VkInstanceCreateInfo instance_info; - char application_name[PATH_MAX]; - uint32_t extension_count; - const char **extensions; -+ uint32_t vk_api_version; - VkInstance vk_instance; - VkResult vr; - HRESULT hr; -@@ -615,6 +619,16 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - application_info.apiVersion = VK_API_VERSION_1_0; - instance->api_version = VKD3D_API_VERSION_1_0; - -+ /* vkEnumerateInstanceVersion was added in Vulkan 1.1, and its absence indicates only 1.0 is supported. */ -+ vkEnumerateInstanceVersion = (void *)vk_global_procs->vkGetInstanceProcAddr(NULL, "vkEnumerateInstanceVersion"); -+ if (vkEnumerateInstanceVersion && vkEnumerateInstanceVersion(&vk_api_version) >= 0 -+ && vk_api_version >= VK_API_VERSION_1_1) -+ { -+ TRACE("Vulkan API version 1.1 is available; requesting it.\n"); -+ application_info.apiVersion = VK_API_VERSION_1_1; -+ } -+ instance->vk_api_version = application_info.apiVersion; -+ - if ((vkd3d_application_info = vkd3d_find_struct(create_info->next, APPLICATION_INFO))) - { - if (vkd3d_application_info->application_name) -@@ -774,6 +788,11 @@ VkInstance vkd3d_instance_get_vk_instance(struct vkd3d_instance *instance) - return instance->vk_instance; - } - -+static bool d3d12_device_environment_is_vulkan_min_1_1(struct d3d12_device *device) -+{ -+ return device->environment == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; -+} -+ - struct vkd3d_physical_device_info - { - /* properties */ -@@ -782,6 +801,7 @@ struct vkd3d_physical_device_info - VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; - VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; - VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; -+ VkPhysicalDeviceSubgroupProperties subgroup_properties; - - VkPhysicalDeviceProperties2KHR properties2; - -@@ -789,6 +809,7 @@ struct vkd3d_physical_device_info - VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_features; - VkPhysicalDeviceDepthClipEnableFeaturesEXT depth_clip_features; - VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features; -+ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_features; - VkPhysicalDeviceRobustness2FeaturesEXT robustness2_features; - VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote_features; - VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; -@@ -796,6 +817,7 @@ struct vkd3d_physical_device_info - VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; - VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features; - VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT mutable_features; -+ VkPhysicalDevice4444FormatsFeaturesEXT formats4444_features; - - VkPhysicalDeviceFeatures2 features2; - }; -@@ -808,6 +830,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; - VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; - VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; -+ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; - VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; - VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; - VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; -@@ -818,13 +841,16 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; - VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; - VkPhysicalDevice physical_device = device->vk_physical_device; -+ VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; - VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; - struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -+ VkPhysicalDeviceSubgroupProperties *subgroup_properties; - - memset(info, 0, sizeof(*info)); - conditional_rendering_features = &info->conditional_rendering_features; - depth_clip_features = &info->depth_clip_features; - descriptor_indexing_features = &info->descriptor_indexing_features; -+ fragment_shader_interlock_features = &info->fragment_shader_interlock_features; - robustness2_features = &info->robustness2_features; - descriptor_indexing_properties = &info->descriptor_indexing_properties; - maintenance3_properties = &info->maintenance3_properties; -@@ -835,31 +861,49 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - vertex_divisor_properties = &info->vertex_divisor_properties; - timeline_semaphore_features = &info->timeline_semaphore_features; - mutable_features = &info->mutable_features; -+ formats4444_features = &info->formats4444_features; - xfb_features = &info->xfb_features; - xfb_properties = &info->xfb_properties; -+ subgroup_properties = &info->subgroup_properties; - - info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - - conditional_rendering_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; -- vk_prepend_struct(&info->features2, conditional_rendering_features); -+ if (vulkan_info->EXT_conditional_rendering) -+ vk_prepend_struct(&info->features2, conditional_rendering_features); - depth_clip_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; -- vk_prepend_struct(&info->features2, depth_clip_features); -+ if (vulkan_info->EXT_depth_clip_enable) -+ vk_prepend_struct(&info->features2, depth_clip_features); - descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; -- vk_prepend_struct(&info->features2, descriptor_indexing_features); -+ if (vulkan_info->EXT_descriptor_indexing) -+ vk_prepend_struct(&info->features2, descriptor_indexing_features); -+ fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; -+ if (vulkan_info->EXT_fragment_shader_interlock) -+ vk_prepend_struct(&info->features2, fragment_shader_interlock_features); - robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; -- vk_prepend_struct(&info->features2, robustness2_features); -+ if (vulkan_info->EXT_robustness2) -+ vk_prepend_struct(&info->features2, robustness2_features); - demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; -- vk_prepend_struct(&info->features2, demote_features); -+ if (vulkan_info->EXT_shader_demote_to_helper_invocation) -+ vk_prepend_struct(&info->features2, demote_features); - buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; -- vk_prepend_struct(&info->features2, buffer_alignment_features); -+ if (vulkan_info->EXT_texel_buffer_alignment) -+ vk_prepend_struct(&info->features2, buffer_alignment_features); - xfb_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; -- vk_prepend_struct(&info->features2, xfb_features); -+ if (vulkan_info->EXT_transform_feedback) -+ vk_prepend_struct(&info->features2, xfb_features); - vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; -- vk_prepend_struct(&info->features2, vertex_divisor_features); -+ if (vulkan_info->EXT_vertex_attribute_divisor) -+ vk_prepend_struct(&info->features2, vertex_divisor_features); - timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; -- vk_prepend_struct(&info->features2, timeline_semaphore_features); -+ if (vulkan_info->KHR_timeline_semaphore) -+ vk_prepend_struct(&info->features2, timeline_semaphore_features); - mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; -- vk_prepend_struct(&info->features2, mutable_features); -+ if (vulkan_info->EXT_mutable_descriptor_type) -+ vk_prepend_struct(&info->features2, mutable_features); -+ formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; -+ if (vulkan_info->EXT_4444_formats) -+ vk_prepend_struct(&info->features2, formats4444_features); - - if (vulkan_info->KHR_get_physical_device_properties2) - VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); -@@ -869,15 +913,23 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - - maintenance3_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; -- vk_prepend_struct(&info->properties2, maintenance3_properties); -+ if (vulkan_info->KHR_maintenance3) -+ vk_prepend_struct(&info->properties2, maintenance3_properties); - descriptor_indexing_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; -- vk_prepend_struct(&info->properties2, descriptor_indexing_properties); -+ if (vulkan_info->EXT_descriptor_indexing) -+ vk_prepend_struct(&info->properties2, descriptor_indexing_properties); - buffer_alignment_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; -- vk_prepend_struct(&info->properties2, buffer_alignment_properties); -+ if (vulkan_info->EXT_texel_buffer_alignment) -+ vk_prepend_struct(&info->properties2, buffer_alignment_properties); - xfb_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; -- vk_prepend_struct(&info->properties2, xfb_properties); -+ if (vulkan_info->EXT_transform_feedback) -+ vk_prepend_struct(&info->properties2, xfb_properties); - vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; -- vk_prepend_struct(&info->properties2, vertex_divisor_properties); -+ if (vulkan_info->EXT_vertex_attribute_divisor) -+ vk_prepend_struct(&info->properties2, vertex_divisor_properties); -+ subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; -+ if (d3d12_device_environment_is_vulkan_min_1_1(device)) -+ vk_prepend_struct(&info->properties2, subgroup_properties); - - if (vulkan_info->KHR_get_physical_device_properties2) - VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); -@@ -1158,6 +1210,7 @@ static void vkd3d_trace_physical_device_limits(const struct vkd3d_physical_devic - - static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_device_info *info) - { -+ const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; - const VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; - const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; - const VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; -@@ -1279,6 +1332,15 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev - TRACE(" VkPhysicalDeviceDepthClipEnableFeaturesEXT:\n"); - TRACE(" depthClipEnable: %#x.\n", depth_clip_features->depthClipEnable); - -+ fragment_shader_interlock_features = &info->fragment_shader_interlock_features; -+ TRACE(" VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT:\n"); -+ TRACE(" fragmentShaderSampleInterlock: %#x.\n.", -+ fragment_shader_interlock_features->fragmentShaderSampleInterlock); -+ TRACE(" fragmentShaderPixelInterlock: %#x\n.", -+ fragment_shader_interlock_features->fragmentShaderPixelInterlock); -+ TRACE(" fragmentShaderShadingRateInterlock: %#x\n.", -+ fragment_shader_interlock_features->fragmentShaderShadingRateInterlock); -+ - demote_features = &info->demote_features; - TRACE(" VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT:\n"); - TRACE(" shaderDemoteToHelperInvocation: %#x.\n", demote_features->shaderDemoteToHelperInvocation); -@@ -1470,22 +1532,92 @@ static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct - return true; - } - -+static HRESULT vkd3d_check_device_extensions(struct d3d12_device *device, -+ const struct vkd3d_device_create_info *create_info, VkExtensionProperties **vk_extensions, -+ uint32_t *vk_extension_count, uint32_t *device_extension_count, bool **user_extension_supported) -+{ -+ const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; -+ const struct vkd3d_optional_device_extensions_info *optional_extensions; -+ VkPhysicalDevice physical_device = device->vk_physical_device; -+ struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -+ VkResult vr; -+ -+ *device_extension_count = 0; -+ -+ if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, vk_extension_count, NULL))) < 0) -+ { -+ ERR("Failed to enumerate device extensions, vr %d.\n", vr); -+ return hresult_from_vk_result(vr); -+ } -+ -+ if (!(*vk_extensions = vkd3d_calloc(*vk_extension_count, sizeof(**vk_extensions)))) -+ return E_OUTOFMEMORY; -+ -+ TRACE("Enumerating %u device extensions.\n", *vk_extension_count); -+ if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, vk_extension_count, *vk_extensions))) < 0) -+ { -+ ERR("Failed to enumerate device extensions, vr %d.\n", vr); -+ vkd3d_free(*vk_extensions); -+ return hresult_from_vk_result(vr); -+ } -+ -+ optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); -+ if (optional_extensions && optional_extensions->extension_count) -+ { -+ if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) -+ { -+ vkd3d_free(*vk_extensions); -+ return E_OUTOFMEMORY; -+ } -+ } -+ else -+ { -+ *user_extension_supported = NULL; -+ } -+ -+ *device_extension_count = vkd3d_check_extensions(*vk_extensions, *vk_extension_count, -+ required_device_extensions, ARRAY_SIZE(required_device_extensions), -+ optional_device_extensions, ARRAY_SIZE(optional_device_extensions), -+ create_info->device_extensions, create_info->device_extension_count, -+ optional_extensions ? optional_extensions->extensions : NULL, -+ optional_extensions ? optional_extensions->extension_count : 0, -+ *user_extension_supported, vulkan_info, "device", -+ device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); -+ -+ return S_OK; -+} -+ - static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - const struct vkd3d_device_create_info *create_info, - struct vkd3d_physical_device_info *physical_device_info, - uint32_t *device_extension_count, bool **user_extension_supported) - { -+ const VkPhysicalDeviceSubgroupProperties *subgroup_properties = &physical_device_info->subgroup_properties; - const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; -- const struct vkd3d_optional_device_extensions_info *optional_extensions; -+ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; - VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; - VkPhysicalDevice physical_device = device->vk_physical_device; - struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; - VkExtensionProperties *vk_extensions; - VkPhysicalDeviceFeatures *features; -- uint32_t count; -- VkResult vr; -+ uint32_t vk_extension_count; -+ HRESULT hr; - -- *device_extension_count = 0; -+ /* SHUFFLE is required to implement WaveReadLaneAt with dynamically uniform index before SPIR-V 1.5 / Vulkan 1.2. */ -+ static const VkSubgroupFeatureFlags required_subgroup_features = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT -+ | VK_SUBGROUP_FEATURE_BASIC_BIT -+ | VK_SUBGROUP_FEATURE_BALLOT_BIT -+ | VK_SUBGROUP_FEATURE_SHUFFLE_BIT -+ | VK_SUBGROUP_FEATURE_QUAD_BIT -+ | VK_SUBGROUP_FEATURE_VOTE_BIT; -+ -+ static const VkSubgroupFeatureFlags required_stages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; -+ -+ if (FAILED(hr = vkd3d_check_device_extensions(device, create_info, &vk_extensions, &vk_extension_count, -+ device_extension_count, user_extension_supported))) -+ return hr; -+ -+ vkd3d_physical_device_info_init(physical_device_info, device); - - vkd3d_trace_physical_device(physical_device, physical_device_info, vk_procs); - vkd3d_trace_physical_device_features(physical_device_info); -@@ -1539,8 +1671,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageReadWithoutFormat - && d3d12_device_supports_typed_uav_load_additional_formats(device); -- /* GL_INTEL_fragment_shader_ordering, no Vulkan equivalent. */ -- device->feature_options.ROVsSupported = FALSE; - /* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */ - device->feature_options.ConservativeRasterizationTier = D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED; - device->feature_options.MaxGPUVirtualAddressBitsPerResource = 40; /* FIXME */ -@@ -1550,10 +1680,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - device->feature_options.ResourceHeapTier = D3D12_RESOURCE_HEAP_TIER_2; - - /* Shader Model 6 support. */ -- device->feature_options1.WaveOps = FALSE; -- device->feature_options1.WaveLaneCountMin = 0; -- device->feature_options1.WaveLaneCountMax = 0; -- device->feature_options1.TotalLaneCount = 0; -+ device->feature_options1.WaveOps = subgroup_properties->subgroupSize >= 4 -+ && (subgroup_properties->supportedOperations & required_subgroup_features) == required_subgroup_features -+ && (subgroup_properties->supportedStages & required_stages) == required_stages; -+ device->feature_options1.WaveLaneCountMin = subgroup_properties->subgroupSize; -+ device->feature_options1.WaveLaneCountMax = subgroup_properties->subgroupSize; -+ device->feature_options1.TotalLaneCount = 32 * subgroup_properties->subgroupSize; /* approx. */ - device->feature_options1.ExpandedComputeResourceStates = TRUE; - device->feature_options1.Int64ShaderOps = features->shaderInt64; - -@@ -1577,47 +1709,11 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - device->feature_options5.RenderPassesTier = D3D12_RENDER_PASS_TIER_0; - device->feature_options5.RaytracingTier = D3D12_RAYTRACING_TIER_NOT_SUPPORTED; - -- if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, NULL))) < 0) -- { -- ERR("Failed to enumerate device extensions, vr %d.\n", vr); -- return hresult_from_vk_result(vr); -- } -- if (!count) -- return S_OK; -- -- if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) -- return E_OUTOFMEMORY; -- -- TRACE("Enumerating %u device extensions.\n", count); -- if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, vk_extensions))) < 0) -- { -- ERR("Failed to enumerate device extensions, vr %d.\n", vr); -- vkd3d_free(vk_extensions); -- return hresult_from_vk_result(vr); -- } -- -- optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); -- if (optional_extensions && optional_extensions->extension_count) -- { -- if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) -- { -- vkd3d_free(vk_extensions); -- return E_OUTOFMEMORY; -- } -- } -- else -- { -- *user_extension_supported = NULL; -- } -- -- *device_extension_count = vkd3d_check_extensions(vk_extensions, count, -- required_device_extensions, ARRAY_SIZE(required_device_extensions), -- optional_device_extensions, ARRAY_SIZE(optional_device_extensions), -- create_info->device_extensions, create_info->device_extension_count, -- optional_extensions ? optional_extensions->extensions : NULL, -- optional_extensions ? optional_extensions->extension_count : 0, -- *user_extension_supported, vulkan_info, "device", -- device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); -+ fragment_shader_interlock = &physical_device_info->fragment_shader_interlock_features; -+ if (!fragment_shader_interlock->fragmentShaderSampleInterlock -+ || !fragment_shader_interlock->fragmentShaderPixelInterlock) -+ vulkan_info->EXT_fragment_shader_interlock = false; -+ device->feature_options.ROVsSupported = vulkan_info->EXT_fragment_shader_interlock; - - if (!physical_device_info->conditional_rendering_features.conditionalRendering) - vulkan_info->EXT_conditional_rendering = false; -@@ -1634,9 +1730,11 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - if (!physical_device_info->timeline_semaphore_features.timelineSemaphore) - vulkan_info->KHR_timeline_semaphore = false; - -+ physical_device_info->formats4444_features.formatA4B4G4R4 = VK_FALSE; -+ - vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; - -- if (get_spec_version(vk_extensions, count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) -+ if (get_spec_version(vk_extensions, vk_extension_count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) - { - const VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *divisor_features; - divisor_features = &physical_device_info->vertex_divisor_features; -@@ -1675,6 +1773,10 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] - = VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING; - -+ if (vulkan_info->EXT_fragment_shader_interlock) -+ vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] -+ = VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK; -+ - if (vulkan_info->EXT_shader_stencil_export) - vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] - = VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT; -@@ -2029,8 +2131,6 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, - - VK_CALL(vkGetPhysicalDeviceMemoryProperties(physical_device, &device->memory_properties)); - -- vkd3d_physical_device_info_init(&physical_device_info, device); -- - if (FAILED(hr = vkd3d_init_device_caps(device, create_info, &physical_device_info, - &extension_count, &user_extension_supported))) - return hr; -@@ -2498,18 +2598,286 @@ static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cach - } - } - -+/* ID3D12ShaderCacheSession */ -+struct d3d12_cache_session -+{ -+ ID3D12ShaderCacheSession ID3D12ShaderCacheSession_iface; -+ unsigned int refcount; -+ -+ struct list cache_list_entry; -+ -+ struct d3d12_device *device; -+ struct vkd3d_private_store private_store; -+ D3D12_SHADER_CACHE_SESSION_DESC desc; -+ struct vkd3d_shader_cache *cache; -+}; -+ -+static struct vkd3d_mutex cache_list_mutex = VKD3D_MUTEX_INITIALIZER; -+static struct list cache_list = LIST_INIT(cache_list); -+ -+static inline struct d3d12_cache_session *impl_from_ID3D12ShaderCacheSession(ID3D12ShaderCacheSession *iface) -+{ -+ return CONTAINING_RECORD(iface, struct d3d12_cache_session, ID3D12ShaderCacheSession_iface); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_QueryInterface(ID3D12ShaderCacheSession *iface, -+ REFIID iid, void **object) -+{ -+ TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); -+ -+ if (!object) -+ { -+ WARN("Output pointer is NULL, returning E_POINTER.\n"); -+ return E_POINTER; -+ } -+ -+ if (IsEqualGUID(iid, &IID_ID3D12ShaderCacheSession) -+ || IsEqualGUID(iid, &IID_ID3D12DeviceChild) -+ || IsEqualGUID(iid, &IID_ID3D12Object) -+ || IsEqualGUID(iid, &IID_IUnknown)) -+ { -+ ID3D12ShaderCacheSession_AddRef(iface); -+ *object = iface; -+ return S_OK; -+ } -+ -+ WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid)); -+ -+ *object = NULL; -+ return E_NOINTERFACE; -+} -+ -+static ULONG STDMETHODCALLTYPE d3d12_cache_session_AddRef(ID3D12ShaderCacheSession *iface) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ unsigned int refcount = vkd3d_atomic_increment_u32(&session->refcount); -+ -+ TRACE("%p increasing refcount to %u.\n", session, refcount); -+ -+ return refcount; -+} -+ -+static void d3d12_cache_session_destroy(struct d3d12_cache_session *session) -+{ -+ struct d3d12_device *device = session->device; -+ -+ TRACE("Destroying cache session %p.\n", session); -+ -+ vkd3d_mutex_lock(&cache_list_mutex); -+ list_remove(&session->cache_list_entry); -+ vkd3d_mutex_unlock(&cache_list_mutex); -+ -+ vkd3d_shader_cache_decref(session->cache); -+ vkd3d_private_store_destroy(&session->private_store); -+ vkd3d_free(session); -+ -+ d3d12_device_release(device); -+} -+ -+static ULONG STDMETHODCALLTYPE d3d12_cache_session_Release(ID3D12ShaderCacheSession *iface) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ unsigned int refcount = vkd3d_atomic_decrement_u32(&session->refcount); -+ -+ TRACE("%p decreasing refcount to %u.\n", session, refcount); -+ -+ if (!refcount) -+ d3d12_cache_session_destroy(session); -+ -+ return refcount; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetPrivateData(ID3D12ShaderCacheSession *iface, -+ REFGUID guid, UINT *data_size, void *data) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); -+ -+ return vkd3d_get_private_data(&session->private_store, guid, data_size, data); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetPrivateData(ID3D12ShaderCacheSession *iface, -+ REFGUID guid, UINT data_size, const void *data) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); -+ -+ return vkd3d_set_private_data(&session->private_store, guid, data_size, data); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetPrivateDataInterface( -+ ID3D12ShaderCacheSession *iface, REFGUID guid, const IUnknown *data) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); -+ -+ return vkd3d_set_private_data_interface(&session->private_store, guid, data); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_SetName(ID3D12ShaderCacheSession *iface, -+ const WCHAR *name) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, name %s.\n", iface, debugstr_w(name, session->device->wchar_size)); -+ -+ return name ? S_OK : E_INVALIDARG; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetDevice(ID3D12ShaderCacheSession *iface, -+ REFIID iid, void **device) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); -+ -+ return d3d12_device_query_interface(session->device, iid, device); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_FindValue(ID3D12ShaderCacheSession *iface, -+ const void *key, UINT key_size, void *value, UINT *value_size) -+{ -+ FIXME("iface %p, key %p, key_size %#x, value %p, value_size %p stub!\n", -+ iface, key, key_size, value, value_size); -+ -+ return DXGI_ERROR_NOT_FOUND; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_cache_session_StoreValue(ID3D12ShaderCacheSession *iface, -+ const void *key, UINT key_size, const void *value, UINT value_size) -+{ -+ FIXME("iface %p, key %p, key_size %#x, value %p, value_size %u stub!\n", iface, key, key_size, -+ value, value_size); -+ -+ return E_NOTIMPL; -+} -+ -+static void STDMETHODCALLTYPE d3d12_cache_session_SetDeleteOnDestroy(ID3D12ShaderCacheSession *iface) -+{ -+ FIXME("iface %p stub!\n", iface); -+} -+ -+static D3D12_SHADER_CACHE_SESSION_DESC * STDMETHODCALLTYPE d3d12_cache_session_GetDesc( -+ ID3D12ShaderCacheSession *iface, D3D12_SHADER_CACHE_SESSION_DESC *desc) -+{ -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ -+ TRACE("iface %p.\n", iface); -+ *desc = session->desc; -+ return desc; -+} -+ -+static const struct ID3D12ShaderCacheSessionVtbl d3d12_cache_session_vtbl = -+{ -+ /* IUnknown methods */ -+ d3d12_cache_session_QueryInterface, -+ d3d12_cache_session_AddRef, -+ d3d12_cache_session_Release, -+ /* ID3D12Object methods */ -+ d3d12_cache_session_GetPrivateData, -+ d3d12_cache_session_SetPrivateData, -+ d3d12_cache_session_SetPrivateDataInterface, -+ d3d12_cache_session_SetName, -+ /* ID3D12DeviceChild methods */ -+ d3d12_cache_session_GetDevice, -+ /* ID3D12ShaderCacheSession methods */ -+ d3d12_cache_session_FindValue, -+ d3d12_cache_session_StoreValue, -+ d3d12_cache_session_SetDeleteOnDestroy, -+ d3d12_cache_session_GetDesc, -+}; -+ -+static HRESULT d3d12_cache_session_init(struct d3d12_cache_session *session, -+ struct d3d12_device *device, const D3D12_SHADER_CACHE_SESSION_DESC *desc) -+{ -+ struct d3d12_cache_session *i; -+ enum vkd3d_result ret; -+ HRESULT hr; -+ -+ session->ID3D12ShaderCacheSession_iface.lpVtbl = &d3d12_cache_session_vtbl; -+ session->refcount = 1; -+ session->desc = *desc; -+ session->cache = NULL; -+ -+ if (!session->desc.MaximumValueFileSizeBytes) -+ session->desc.MaximumValueFileSizeBytes = 128 * 1024 * 1024; -+ if (!session->desc.MaximumInMemoryCacheSizeBytes) -+ session->desc.MaximumInMemoryCacheSizeBytes = 1024 * 1024; -+ if (!session->desc.MaximumInMemoryCacheEntries) -+ session->desc.MaximumInMemoryCacheEntries = 128; -+ -+ if (FAILED(hr = vkd3d_private_store_init(&session->private_store))) -+ return hr; -+ -+ vkd3d_mutex_lock(&cache_list_mutex); -+ -+ /* We expect the number of open caches to be small. */ -+ LIST_FOR_EACH_ENTRY(i, &cache_list, struct d3d12_cache_session, cache_list_entry) -+ { -+ if (!memcmp(&i->desc.Identifier, &desc->Identifier, sizeof(desc->Identifier))) -+ { -+ TRACE("Found an existing cache %p from session %p.\n", i->cache, i); -+ if (desc->Version == i->desc.Version) -+ { -+ session->desc = i->desc; -+ vkd3d_shader_cache_incref(session->cache = i->cache); -+ break; -+ } -+ else -+ { -+ WARN("version mismatch: Existing %"PRIu64" new %"PRIu64".\n", -+ i->desc.Version, desc->Version); -+ hr = DXGI_ERROR_ALREADY_EXISTS; -+ goto error; -+ } -+ } -+ } -+ -+ if (!session->cache) -+ { -+ if (session->desc.Mode == D3D12_SHADER_CACHE_MODE_DISK) -+ FIXME("Disk caches are not yet implemented.\n"); -+ -+ ret = vkd3d_shader_open_cache(&session->cache); -+ if (ret) -+ { -+ WARN("Failed to open shader cache.\n"); -+ hr = hresult_from_vkd3d_result(ret); -+ goto error; -+ } -+ } -+ -+ /* Add it to the list even if we reused an existing cache. The other session might be destroyed, -+ * but the cache stays alive and can be opened a third time. */ -+ list_add_tail(&cache_list, &session->cache_list_entry); -+ d3d12_device_add_ref(session->device = device); -+ -+ vkd3d_mutex_unlock(&cache_list_mutex); -+ return S_OK; -+ -+error: -+ vkd3d_private_store_destroy(&session->private_store); -+ vkd3d_mutex_unlock(&cache_list_mutex); -+ return hr; -+} -+ - /* ID3D12Device */ --static inline struct d3d12_device *impl_from_ID3D12Device7(ID3D12Device7 *iface) -+static inline struct d3d12_device *impl_from_ID3D12Device9(ID3D12Device9 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device7_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device9_iface); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device9 *iface, - REFIID riid, void **object) - { - TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - -- if (IsEqualGUID(riid, &IID_ID3D12Device7) -+ if (IsEqualGUID(riid, &IID_ID3D12Device9) -+ || IsEqualGUID(riid, &IID_ID3D12Device8) -+ || IsEqualGUID(riid, &IID_ID3D12Device7) - || IsEqualGUID(riid, &IID_ID3D12Device6) - || IsEqualGUID(riid, &IID_ID3D12Device5) - || IsEqualGUID(riid, &IID_ID3D12Device4) -@@ -2531,9 +2899,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *ifac - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device7 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device9 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - unsigned int refcount = vkd3d_atomic_increment_u32(&device->refcount); - - TRACE("%p increasing refcount to %u.\n", device, refcount); -@@ -2563,9 +2931,9 @@ static HRESULT device_worker_stop(struct d3d12_device *device) - return S_OK; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device9 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - unsigned int refcount = vkd3d_atomic_decrement_u32(&device->refcount); - - TRACE("%p decreasing refcount to %u.\n", device, refcount); -@@ -2602,10 +2970,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device9 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2613,10 +2981,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *ifac - return vkd3d_get_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device9 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2624,19 +2992,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *ifac - return vkd3d_set_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device9 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&device->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device9 *iface, const WCHAR *name) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); - -@@ -2644,17 +3012,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, cons - VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device7 *iface) -+static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device9 *iface) - { - TRACE("iface %p.\n", iface); - - return 1; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device9 *iface, - const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_command_queue *object; - HRESULT hr; - -@@ -2668,10 +3036,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 * - riid, command_queue); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device9 *iface, - D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_command_allocator *object; - HRESULT hr; - -@@ -2685,10 +3053,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic - riid, command_allocator); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device9 *iface, - const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2702,10 +3070,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device9 *iface, - const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2719,11 +3087,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device9 *iface, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, - ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_command_list *object; - HRESULT hr; - -@@ -2846,10 +3214,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) - return true; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 *iface, - D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", - iface, feature, feature_data, feature_data_size); -@@ -3095,9 +3463,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 - return E_INVALIDARG; - } - -+ if (data->HighestShaderModel != D3D_SHADER_MODEL_5_1 -+ && (data->HighestShaderModel < D3D_SHADER_MODEL_6_0 -+ || data->HighestShaderModel > D3D_HIGHEST_SHADER_MODEL)) -+ { -+ WARN("Unknown shader model %#x.\n", data->HighestShaderModel); -+ return E_INVALIDARG; -+ } -+ - TRACE("Request shader model %#x.\n", data->HighestShaderModel); - -- data->HighestShaderModel = D3D_SHADER_MODEL_5_1; -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_6_0); -+#else -+ data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_5_1); -+#endif - - TRACE("Shader model %#x.\n", data->HighestShaderModel); - return S_OK; -@@ -3515,16 +3895,101 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 - return S_OK; - } - -+ case D3D12_FEATURE_D3D12_OPTIONS14: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS14 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->AdvancedTextureOpsSupported = FALSE; -+ data->WriteableMSAATexturesSupported = FALSE; -+ data->IndependentFrontAndBackStencilRefMaskSupported = FALSE; -+ -+ TRACE("Advanced texture ops %#x.\n", data->AdvancedTextureOpsSupported); -+ TRACE("Writeable MSAA textures %#x.\n", data->WriteableMSAATexturesSupported); -+ TRACE("Independent front and back stencil ref mask %#x.\n", data->IndependentFrontAndBackStencilRefMaskSupported); -+ return S_OK; -+ } -+ -+ case D3D12_FEATURE_D3D12_OPTIONS15: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS15 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->TriangleFanSupported = FALSE; -+ data->DynamicIndexBufferStripCutSupported = FALSE; -+ -+ TRACE("Triangle fan %#x.\n", data->TriangleFanSupported); -+ TRACE("Dynamic index buffer strip cut %#x.\n", data->DynamicIndexBufferStripCutSupported); -+ return S_OK; -+ } -+ -+ case D3D12_FEATURE_D3D12_OPTIONS16: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS16 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->DynamicDepthBiasSupported = FALSE; -+ data->GPUUploadHeapSupported = FALSE; -+ -+ TRACE("Dynamic depth bias %#x.\n", data->DynamicDepthBiasSupported); -+ TRACE("GPU upload heap %#x.\n", data->GPUUploadHeapSupported); -+ return S_OK; -+ } -+ -+ case D3D12_FEATURE_D3D12_OPTIONS17: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS17 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->NonNormalizedCoordinateSamplersSupported = FALSE; -+ data->ManualWriteTrackingResourceSupported = FALSE; -+ -+ TRACE("Non-normalized coordinate samplers %#x.\n", data->NonNormalizedCoordinateSamplersSupported); -+ TRACE("Manual write tracking resource %#x.\n", data->ManualWriteTrackingResourceSupported); -+ return S_OK; -+ } -+ -+ case D3D12_FEATURE_D3D12_OPTIONS18: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS18 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->RenderPassesValid = FALSE; -+ -+ TRACE("Render passes valid %#x.\n", data->RenderPassesValid); -+ return S_OK; -+ } -+ - default: - FIXME("Unhandled feature %#x.\n", feature); - return E_NOTIMPL; - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device9 *iface, - const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_descriptor_heap *object; - HRESULT hr; - -@@ -3538,7 +4003,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 - &IID_ID3D12DescriptorHeap, riid, descriptor_heap); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device7 *iface, -+static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device9 *iface, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { - TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); -@@ -3561,11 +4026,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device9 *iface, - UINT node_mask, const void *bytecode, SIZE_T bytecode_length, - REFIID riid, void **root_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_root_signature *object; - HRESULT hr; - -@@ -3581,10 +4046,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 - &IID_ID3D12RootSignature, riid, root_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device9 *iface, - const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); -@@ -3593,11 +4058,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device9 *iface, - ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, desc %p, descriptor %s.\n", -@@ -3607,11 +4072,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device9 *iface, - ID3D12Resource *resource, ID3D12Resource *counter_resource, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %s.\n", -@@ -3622,7 +4087,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device9 *iface, - ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3630,10 +4095,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 - iface, resource, desc, debug_cpu_handle(descriptor)); - - d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device9(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device9 *iface, - ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3641,13 +4106,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 - iface, resource, desc, debug_cpu_handle(descriptor)); - - d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device9(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device9 *iface, - const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); -@@ -3656,14 +4121,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device9 *iface, - UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, - const UINT *dst_descriptor_range_sizes, - UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, - const UINT *src_descriptor_range_sizes, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - unsigned int dst_range_size, src_range_size; - struct d3d12_descriptor_heap *dst_heap; -@@ -3719,7 +4184,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, - } - } - --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device9 *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -@@ -3850,10 +4315,10 @@ static void d3d12_device_get_resource_allocation_info(struct d3d12_device *devic - } - - static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( -- ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, -+ ID3D12Device9 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, - UINT count, const D3D12_RESOURCE_DESC *resource_descs) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p.\n", - iface, info, visible_mask, count, resource_descs); -@@ -3865,10 +4330,10 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour - return info; - } - --static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device7 *iface, -+static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device9 *iface, - D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - bool coherent; - - TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", -@@ -3908,12 +4373,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope - return heap_properties; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device9 *iface, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_resource *object; - HRESULT hr; -@@ -3935,10 +4400,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device9 *iface, - const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_heap *object; - HRESULT hr; - -@@ -3954,12 +4419,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, - return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device9 *iface, - ID3D12Heap *heap, UINT64 heap_offset, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_heap *heap_object; - struct d3d12_resource *object; -@@ -3980,11 +4445,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device9 *iface, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_resource *object; - HRESULT hr; -@@ -4001,11 +4466,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device9 *iface, - ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, - const WCHAR *name, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", - iface, object, attributes, (uint32_t)access, debugstr_w(name, device->wchar_size), handle); -@@ -4013,7 +4478,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 * - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device9 *iface, - HANDLE handle, REFIID riid, void **object) - { - FIXME("iface %p, handle %p, riid %s, object %p stub!\n", -@@ -4022,10 +4487,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *if - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device9 *iface, - const WCHAR *name, DWORD access, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - FIXME("iface %p, name %s, access %#x, handle %p stub!\n", - iface, debugstr_w(name, device->wchar_size), (uint32_t)access, handle); -@@ -4033,7 +4498,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device9 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - ID3D12Fence *fence; -@@ -4041,17 +4506,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, - - TRACE("iface %p, object_count %u, objects %p.\n", iface, object_count, objects); - -- if (FAILED(hr = ID3D12Device7_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) -+ if (FAILED(hr = ID3D12Device9_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) - return hr; - -- hr = ID3D12Device7_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); -+ hr = ID3D12Device9_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); - if (SUCCEEDED(hr)) - ID3D12Fence_SetEventOnCompletion(fence, 1, NULL); - ID3D12Fence_Release(fence); - return hr; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device9 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", -@@ -4060,10 +4525,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device9 *iface, - UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_fence *object; - HRESULT hr; - -@@ -4076,9 +4541,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, - return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device7 *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device9 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p.\n", iface); - -@@ -4163,12 +4628,12 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, - *total_bytes = total; - } - --static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device9 *iface, - const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, - UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, - UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - - TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " -@@ -4182,10 +4647,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 * - base_offset, layouts, row_counts, row_sizes, total_bytes); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device9 *iface, - const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_query_heap *object; - HRESULT hr; - -@@ -4198,18 +4663,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *ifa - return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device7 *iface, BOOL enable) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device9 *iface, BOOL enable) - { - FIXME("iface %p, enable %#x stub!\n", iface, enable); - - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device9 *iface, - const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, - REFIID iid, void **command_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_command_signature *object; - HRESULT hr; - -@@ -4223,14 +4688,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic - &IID_ID3D12CommandSignature, iid, command_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device9 *iface, - ID3D12Resource *resource, UINT *total_tile_count, - D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, - UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, - D3D12_SUBRESOURCE_TILING *sub_resource_tilings) - { - const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " - "standard_title_shape %p, sub_resource_tiling_count %p, " -@@ -4243,9 +4708,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *ifac - sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); - } - --static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface, LUID *luid) -+static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device9 *iface, LUID *luid) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, luid %p.\n", iface, luid); - -@@ -4254,7 +4719,7 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface - return luid; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device9 *iface, - const void *blob, SIZE_T blob_size, REFIID iid, void **lib) - { - FIXME("iface %p, blob %p, blob_size %"PRIuPTR", iid %s, lib %p stub!\n", -@@ -4263,7 +4728,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device - return DXGI_ERROR_UNSUPPORTED; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device9 *iface, - ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, - D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) - { -@@ -4273,7 +4738,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion( - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device9 *iface, - UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) - { - FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); -@@ -4281,10 +4746,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device9 *iface, - const D3D12_PIPELINE_STATE_STREAM_DESC *desc, REFIID iid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -4296,7 +4761,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 - return return_interface(&object->ID3D12PipelineState_iface, &IID_ID3D12PipelineState, iid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device9 *iface, - const void *address, REFIID iid, void **heap) - { - FIXME("iface %p, address %p, iid %s, heap %p stub!\n", iface, address, debugstr_guid(iid), heap); -@@ -4304,7 +4769,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device9 *iface, - HANDLE file_mapping, REFIID iid, void **heap) - { - FIXME("iface %p, file_mapping %p, iid %s, heap %p stub!\n", iface, file_mapping, debugstr_guid(iid), heap); -@@ -4312,7 +4777,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device9 *iface, - D3D12_RESIDENCY_FLAGS flags, UINT num_objects, ID3D12Pageable *const *objects, - ID3D12Fence *fence, UINT64 fence_value) - { -@@ -4323,7 +4788,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device9 *iface, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, D3D12_COMMAND_LIST_FLAGS flags, - REFIID iid, void **command_list) - { -@@ -4333,7 +4798,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 * - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device9 *iface, - const D3D12_PROTECTED_RESOURCE_SESSION_DESC *desc, REFIID iid, void **session) - { - FIXME("iface %p, desc %p, iid %s, session %p stub!\n", iface, desc, debugstr_guid(iid), session); -@@ -4341,13 +4806,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device9 *iface, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, - ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - D3D12_RESOURCE_DESC1 resource_desc; - struct d3d12_resource *object; - HRESULT hr; -@@ -4369,11 +4834,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Dev - return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device9 *iface, - const D3D12_HEAP_DESC *desc, ID3D12ProtectedResourceSession *protected_session, - REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - struct d3d12_heap *object; - HRESULT hr; - -@@ -4389,7 +4854,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, - return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device9 *iface, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, - ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) -@@ -4403,11 +4868,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Devi - } - - static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo1( -- ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, -+ ID3D12Device9 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, - UINT count, const D3D12_RESOURCE_DESC *resource_descs, - D3D12_RESOURCE_ALLOCATION_INFO1 *info1) - { -- struct d3d12_device *device = impl_from_ID3D12Device7(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); - - TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", - iface, info, visible_mask, count, resource_descs, info1); -@@ -4419,7 +4884,7 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour - return info; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device9 *iface, - ID3D12LifetimeOwner *owner, REFIID iid, void **tracker) - { - FIXME("iface %p, owner %p, iid %s, tracker %p stub!\n", iface, owner, debugstr_guid(iid), tracker); -@@ -4427,12 +4892,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device - return E_NOTIMPL; - } - --static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device7 *iface) -+static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device9 *iface) - { - FIXME("iface %p stub!\n", iface); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device9 *iface, - UINT *num_meta_commands, D3D12_META_COMMAND_DESC *command_desc) - { - FIXME("iface %p, num_meta_commands %p, command_desc %p stub!\n", iface, -@@ -4441,7 +4906,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device9 *iface, - REFGUID command_id, D3D12_META_COMMAND_PARAMETER_STAGE stage, - UINT *size_in_bytes, UINT *parameter_count, - D3D12_META_COMMAND_PARAMETER_DESC *parameter_desc) -@@ -4453,7 +4918,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device9 *iface, - REFGUID command_id, UINT node_mask, const void *parameters_data, - SIZE_T data_size_in_bytes, REFIID iid, void **meta_command) - { -@@ -4465,7 +4930,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *i - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device9 *iface, - const D3D12_STATE_OBJECT_DESC *desc, REFIID iid, void **state_object) - { - FIXME("iface %p, desc %p, iid %s, state_object %p stub!\n", iface, desc, debugstr_guid(iid), state_object); -@@ -4473,14 +4938,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *i - return E_NOTIMPL; - } - --static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device7 *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device9 *iface, - const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc, - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO *info) - { - FIXME("iface %p, desc %p, info %p stub!\n", iface, desc, info); - } - --static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device7 *iface, -+static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device9 *iface, - D3D12_SERIALIZED_DATA_TYPE data_type, const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER *identifier) - { - FIXME("iface %p, data_type %u, identifier %p stub!\n", iface, data_type, identifier); -@@ -4488,7 +4953,7 @@ static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_Ch - return D3D12_DRIVER_MATCHING_IDENTIFIER_UNRECOGNIZED; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device9 *iface, - D3D12_BACKGROUND_PROCESSING_MODE mode, D3D12_MEASUREMENTS_ACTION action, HANDLE event, - BOOL *further_measurements_desired) - { -@@ -4498,7 +4963,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12 - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *iface, -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device9 *iface, - const D3D12_STATE_OBJECT_DESC *addition, ID3D12StateObject *state_object_to_grow_from, - REFIID riid, void **new_state_object) - { -@@ -4508,7 +4974,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *if - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device7 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device9 *iface, - const D3D12_PROTECTED_RESOURCE_SESSION_DESC1 *desc, REFIID riid, void **session) - { - FIXME("iface %p, desc %p, riid %s, session %p stub!\n", iface, desc, debugstr_guid(riid), session); -@@ -4516,7 +4982,167 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID - return E_NOTIMPL; - } - --static const struct ID3D12Device7Vtbl d3d12_device_vtbl = -+static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo2(ID3D12Device9 *iface, -+ D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, -+ const D3D12_RESOURCE_DESC1 *resource_descs, D3D12_RESOURCE_ALLOCATION_INFO1 *info1) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); -+ -+ TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", -+ iface, info, visible_mask, count, resource_descs, info1); -+ -+ debug_ignored_node_mask(visible_mask); -+ -+ d3d12_device_get_resource1_allocation_info(device, info1, count, resource_descs, info); -+ -+ return info; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2(ID3D12Device9 *iface, -+ const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *desc, -+ D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, -+ ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); -+ struct d3d12_resource *object; -+ HRESULT hr; -+ -+ TRACE("iface %p, heap_properties %p, heap_flags %#x, desc %p, initial_state %#x, " -+ "optimized_clear_value %p, protected_session %p, iid %s, resource %p.\n", -+ iface, heap_properties, heap_flags, desc, initial_state, -+ optimized_clear_value, protected_session, debugstr_guid(iid), resource); -+ -+ if (FAILED(hr = d3d12_committed_resource_create(device, heap_properties, heap_flags, -+ desc, initial_state, optimized_clear_value, protected_session, &object))) -+ { -+ *resource = NULL; -+ return hr; -+ } -+ -+ return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1(ID3D12Device9 *iface, -+ ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC1 *resource_desc, -+ D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, -+ REFIID iid, void **resource) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); -+ struct d3d12_heap *heap_object; -+ struct d3d12_resource *object; -+ HRESULT hr; -+ -+ TRACE("iface %p, heap %p, heap_offset %#"PRIx64", desc %p, initial_state %#x, " -+ "optimized_clear_value %p, iid %s, resource %p.\n", -+ iface, heap, heap_offset, resource_desc, initial_state, -+ optimized_clear_value, debugstr_guid(iid), resource); -+ -+ heap_object = unsafe_impl_from_ID3D12Heap(heap); -+ -+ if (FAILED(hr = d3d12_placed_resource_create(device, heap_object, heap_offset, -+ resource_desc, initial_state, optimized_clear_value, &object))) -+ return hr; -+ -+ return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); -+} -+ -+static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView(ID3D12Device9 *iface, -+ ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) -+{ -+ FIXME("iface %p, target_resource %p, feedback_resource %p, descriptor %s stub!\n", -+ iface, target_resource, feedback_resource, debug_cpu_handle(descriptor)); -+} -+ -+static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints1(ID3D12Device9 *iface, -+ const D3D12_RESOURCE_DESC1 *desc, UINT first_sub_resource, UINT sub_resource_count, -+ UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, -+ UINT64 *row_sizes, UINT64 *total_bytes) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); -+ -+ TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " -+ "layouts %p, row_counts %p, row_sizes %p, total_bytes %p.\n", -+ iface, desc, first_sub_resource, sub_resource_count, base_offset, -+ layouts, row_counts, row_sizes, total_bytes); -+ -+ d3d12_device_get_copyable_footprints(device, desc, first_sub_resource, sub_resource_count, -+ base_offset, layouts, row_counts, row_sizes, total_bytes); -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateShaderCacheSession(ID3D12Device9 *iface, -+ const D3D12_SHADER_CACHE_SESSION_DESC *desc, REFIID iid, void **session) -+{ -+ struct d3d12_device *device = impl_from_ID3D12Device9(iface); -+ struct d3d12_cache_session *object; -+ static const GUID guid_null = {0}; -+ HRESULT hr; -+ -+ static const UINT valid_flags = D3D12_SHADER_CACHE_FLAG_DRIVER_VERSIONED -+ | D3D12_SHADER_CACHE_FLAG_USE_WORKING_DIR; -+ -+ TRACE("iface %p, desc %p, iid %s, session %p.\n", iface, desc, debugstr_guid(iid), session); -+ -+ if (!desc || !memcmp(&desc->Identifier, &guid_null, sizeof(desc->Identifier))) -+ { -+ WARN("No description or identifier, returning E_INVALIDARG.\n"); -+ return E_INVALIDARG; -+ } -+ if (desc->MaximumValueFileSizeBytes > 1024 * 1024 * 1024) -+ { -+ WARN("Requested size is larger than 1GiB, returning E_INVALIDARG.\n"); -+ return E_INVALIDARG; -+ } -+ if (desc->Flags & ~valid_flags) -+ { -+ WARN("Invalid flags %#x, returning E_INVALIDARG.\n", desc->Flags); -+ return E_INVALIDARG; -+ } -+ if (desc->Mode != D3D12_SHADER_CACHE_MODE_MEMORY && desc->Mode != D3D12_SHADER_CACHE_MODE_DISK) -+ { -+ WARN("Invalid mode %#x, returning E_INVALIDARG.\n", desc->Mode); -+ return E_INVALIDARG; -+ } -+ if (!session) -+ { -+ WARN("No output pointer, returning S_FALSE.\n"); -+ return S_FALSE; -+ } -+ *session = NULL; -+ -+ if (!(object = vkd3d_malloc(sizeof(*object)))) -+ return E_OUTOFMEMORY; -+ -+ if (FAILED(hr = d3d12_cache_session_init(object, device, desc))) -+ { -+ vkd3d_free(object); -+ return hr; -+ } -+ -+ hr = ID3D12ShaderCacheSession_QueryInterface(&object->ID3D12ShaderCacheSession_iface, iid, -+ session); -+ ID3D12ShaderCacheSession_Release(&object->ID3D12ShaderCacheSession_iface); -+ return hr; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_ShaderCacheControl(ID3D12Device9 *iface, -+ D3D12_SHADER_CACHE_KIND_FLAGS kinds, D3D12_SHADER_CACHE_CONTROL_FLAGS control) -+{ -+ FIXME("iface %p, kinds %#x control %#x stub!\n", iface, kinds, control); -+ -+ return E_NOTIMPL; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue1(ID3D12Device9 *iface, -+ const D3D12_COMMAND_QUEUE_DESC *desc, REFIID creator_id, REFIID iid, -+ void **command_queue) -+{ -+ FIXME("iface %p, desc %p, creator %s, iid %s, queue %p stub!\n", iface, desc, -+ debugstr_guid(creator_id), debugstr_guid(iid), command_queue); -+ -+ return E_NOTIMPL; -+} -+ -+static const struct ID3D12Device9Vtbl d3d12_device_vtbl = - { - /* IUnknown methods */ - d3d12_device_QueryInterface, -@@ -4596,14 +5222,24 @@ static const struct ID3D12Device7Vtbl d3d12_device_vtbl = - /* ID3D12Device7 methods */ - d3d12_device_AddToStateObject, - d3d12_device_CreateProtectedResourceSession1, -+ /* ID3D12Device8 methods */ -+ d3d12_device_GetResourceAllocationInfo2, -+ d3d12_device_CreateCommittedResource2, -+ d3d12_device_CreatePlacedResource1, -+ d3d12_device_CreateSamplerFeedbackUnorderedAccessView, -+ d3d12_device_GetCopyableFootprints1, -+ /* ID3D12Device9 methods */ -+ d3d12_device_CreateShaderCacheSession, -+ d3d12_device_ShaderCacheControl, -+ d3d12_device_CreateCommandQueue1, - }; - --struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface) -+struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface) - { - if (!iface) - return NULL; - assert(iface->lpVtbl == &d3d12_device_vtbl); -- return impl_from_ID3D12Device7(iface); -+ return impl_from_ID3D12Device9(iface); - } - - static void *device_worker_main(void *arg) -@@ -4646,13 +5282,15 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, - const struct vkd3d_vk_device_procs *vk_procs; - HRESULT hr; - -- device->ID3D12Device7_iface.lpVtbl = &d3d12_device_vtbl; -+ device->ID3D12Device9_iface.lpVtbl = &d3d12_device_vtbl; - device->refcount = 1; - - vkd3d_instance_incref(device->vkd3d_instance = instance); - device->vk_info = instance->vk_info; - device->signal_event = instance->signal_event; - device->wchar_size = instance->wchar_size; -+ device->environment = (instance->vk_api_version >= VK_API_VERSION_1_1) -+ ? VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1 : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; - - device->adapter_luid = create_info->adapter_luid; - device->removed_reason = S_OK; -@@ -4894,28 +5532,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha - - IUnknown *vkd3d_get_device_parent(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); - - return d3d12_device->parent; - } - - VkDevice vkd3d_get_vk_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); - - return d3d12_device->vk_device; - } - - VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); - - return d3d12_device->vk_physical_device; - } - - struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device9((ID3D12Device9 *)device); - - return d3d12_device->vkd3d_instance; - } -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 89764d0901d..179999148bc 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -1857,6 +1857,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1 - - HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device) - { -+ const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion; - const struct vkd3d_format *format; - - switch (desc->Dimension) -@@ -1892,6 +1893,13 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 - WARN("Invalid sample count 0.\n"); - return E_INVALIDARG; - } -+ if (desc->SampleDesc.Count > 1 -+ && !(desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) -+ { -+ WARN("Sample count %u invalid without ALLOW_RENDER_TARGET or ALLOW_DEPTH_STENCIL.\n", -+ desc->SampleDesc.Count); -+ return E_INVALIDARG; -+ } - - if (!(format = vkd3d_format_from_d3d12_resource_desc(device, desc, 0))) - { -@@ -1926,6 +1934,12 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 - - d3d12_validate_resource_flags(desc->Flags); - -+ if (mip_region->Width && mip_region->Height && mip_region->Depth) -+ { -+ FIXME("Unhandled sampler feedback mip region size (%u, %u, %u).\n", mip_region->Width, mip_region->Height, -+ mip_region->Depth); -+ } -+ - return S_OK; - } - -@@ -1989,6 +2003,11 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - WARN("Invalid initial resource state %#x.\n", initial_state); - return E_INVALIDARG; - } -+ if (initial_state == D3D12_RESOURCE_STATE_RENDER_TARGET && !(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) -+ { -+ WARN("Invalid initial resource state %#x for non-render-target.\n", initial_state); -+ return E_INVALIDARG; -+ } - - if (optimized_clear_value && d3d12_resource_is_buffer(resource)) - { -@@ -2253,7 +2272,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - HRESULT vkd3d_create_image_resource(ID3D12Device *device, - const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) - { -- struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device7((ID3D12Device7 *)device); -+ struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device9((ID3D12Device9 *)device); - struct d3d12_resource *object; - HRESULT hr; - -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 08cc110e8f7..199d8043ffe 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -2045,6 +2045,9 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState - - d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); - -+ if (state->implicit_root_signature) -+ d3d12_root_signature_Release(state->implicit_root_signature); -+ - vkd3d_free(state); - - d3d12_device_release(device); -@@ -2156,6 +2159,8 @@ static unsigned int feature_flags_compile_option(const struct d3d12_device *devi - flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_INT64; - if (device->feature_options.DoublePrecisionFloatShaderOps) - flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64; -+ if (device->feature_options1.WaveOps) -+ flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS; - - return flags; - } -@@ -2413,8 +2418,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - struct vkd3d_shader_interface_info shader_interface; - struct vkd3d_shader_descriptor_offset_info offset_info; -- const struct d3d12_root_signature *root_signature; - struct vkd3d_shader_spirv_target_info target_info; -+ struct d3d12_root_signature *root_signature; - VkPipelineLayout vk_pipeline_layout; - HRESULT hr; - -@@ -2425,17 +2430,31 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st - - if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->root_signature))) - { -- WARN("Root signature is NULL.\n"); -- return E_INVALIDARG; -+ TRACE("Root signature is NULL, looking for an embedded signature.\n"); -+ if (FAILED(hr = d3d12_root_signature_create(device, -+ desc->cs.pShaderBytecode, desc->cs.BytecodeLength, &root_signature))) -+ { -+ WARN("Failed to find an embedded root signature, hr %s.\n", debugstr_hresult(hr)); -+ return hr; -+ } -+ state->implicit_root_signature = &root_signature->ID3D12RootSignature_iface; -+ } -+ else -+ { -+ state->implicit_root_signature = NULL; - } - - if (FAILED(hr = d3d12_pipeline_state_find_and_init_uav_counters(state, device, root_signature, - &desc->cs, VK_SHADER_STAGE_COMPUTE_BIT))) -+ { -+ if (state->implicit_root_signature) -+ d3d12_root_signature_Release(state->implicit_root_signature); - return hr; -+ } - - memset(&target_info, 0, sizeof(target_info)); - target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; -- target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; -+ target_info.environment = device->environment; - target_info.extensions = device->vk_info.shader_extensions; - target_info.extension_count = device->vk_info.shader_extension_count; - -@@ -2476,6 +2495,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st - { - WARN("Failed to create Vulkan compute pipeline, hr %s.\n", debugstr_hresult(hr)); - d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); -+ if (state->implicit_root_signature) -+ d3d12_root_signature_Release(state->implicit_root_signature); - return hr; - } - -@@ -2483,6 +2504,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st - { - VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL)); - d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); -+ if (state->implicit_root_signature) -+ d3d12_root_signature_Release(state->implicit_root_signature); - return hr; - } - -@@ -3156,7 +3179,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s - ps_target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; - ps_target_info.next = NULL; - ps_target_info.entry_point = "main"; -- ps_target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; -+ ps_target_info.environment = device->environment; - ps_target_info.extensions = vk_info->shader_extensions; - ps_target_info.extension_count = vk_info->shader_extension_count; - ps_target_info.parameters = ps_shader_parameters; -@@ -3186,7 +3209,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s - - memset(&target_info, 0, sizeof(target_info)); - target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; -- target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; -+ target_info.environment = device->environment; - target_info.extensions = vk_info->shader_extensions; - target_info.extension_count = vk_info->shader_extension_count; - -@@ -3484,6 +3507,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s - goto fail; - - state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; -+ state->implicit_root_signature = NULL; - d3d12_device_add_ref(state->device = device); - - return S_OK; -diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c -index ac79ae5ddff..11029c9f5f9 100644 ---- a/libs/vkd3d/libs/vkd3d/utils.c -+++ b/libs/vkd3d/libs/vkd3d/utils.c -@@ -87,6 +87,8 @@ static const struct vkd3d_format vkd3d_formats[] = - {DXGI_FORMAT_R8_SNORM, VK_FORMAT_R8_SNORM, 1, 1, 1, 1, COLOR, 1}, - {DXGI_FORMAT_R8_SINT, VK_FORMAT_R8_SINT, 1, 1, 1, 1, COLOR, 1, SINT}, - {DXGI_FORMAT_A8_UNORM, VK_FORMAT_R8_UNORM, 1, 1, 1, 1, COLOR, 1}, -+ {DXGI_FORMAT_B5G6R5_UNORM, VK_FORMAT_R5G6B5_UNORM_PACK16, 2, 1, 1, 1, COLOR, 1}, -+ {DXGI_FORMAT_B5G5R5A1_UNORM, VK_FORMAT_A1R5G5B5_UNORM_PACK16, 2, 1, 1, 1, COLOR, 1}, - {DXGI_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, - {DXGI_FORMAT_B8G8R8X8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1}, - {DXGI_FORMAT_B8G8R8A8_TYPELESS, VK_FORMAT_B8G8R8A8_UNORM, 4, 1, 1, 1, COLOR, 1, TYPELESS}, -@@ -116,6 +118,9 @@ static const struct vkd3d_format vkd3d_formats[] = - {DXGI_FORMAT_BC7_UNORM_SRGB, VK_FORMAT_BC7_SRGB_BLOCK, 1, 4, 4, 16, COLOR, 1}, - }; - -+static const struct vkd3d_format format_b4g4r4a4 = -+ {DXGI_FORMAT_B4G4R4A4_UNORM, VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, 2, 1, 1, 1, COLOR, 1}; -+ - /* Each depth/stencil format is only compatible with itself in Vulkan. */ - static const struct vkd3d_format vkd3d_depth_stencil_formats[] = - { -@@ -449,6 +454,11 @@ const struct vkd3d_format *vkd3d_get_format(const struct d3d12_device *device, - return &vkd3d_formats[i]; - } - -+ /* Do not check VkPhysicalDevice4444FormatsFeaturesEXT because apps -+ * should query format support, which returns more detailed info. */ -+ if (dxgi_format == format_b4g4r4a4.dxgi_format && device->vk_info.EXT_4444_formats) -+ return &format_b4g4r4a4; -+ - return NULL; - } - -@@ -891,6 +901,30 @@ bool vkd3d_get_program_name(char program_name[PATH_MAX]) - return true; - } - -+#elif defined(WIN32) -+ -+bool vkd3d_get_program_name(char program_name[PATH_MAX]) -+{ -+ char buffer[MAX_PATH]; -+ char *p, *name; -+ size_t len; -+ -+ *program_name = '\0'; -+ len = GetModuleFileNameA(NULL, buffer, ARRAY_SIZE(buffer)); -+ if (!(len && len < MAX_PATH)) -+ return false; -+ -+ name = buffer; -+ if ((p = strrchr(name, '/'))) -+ name = p + 1; -+ if ((p = strrchr(name, '\\'))) -+ name = p + 1; -+ -+ len = strlen(name) + 1; -+ memcpy(program_name, name, len); -+ return true; -+} -+ - #else - - bool vkd3d_get_program_name(char program_name[PATH_MAX]) -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index 7919b7d8760..29305fbdc63 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, - - if (!device) - { -- ID3D12Device_Release(&object->ID3D12Device7_iface); -+ ID3D12Device_Release(&object->ID3D12Device9_iface); - return S_FALSE; - } - -- return return_interface(&object->ID3D12Device7_iface, &IID_ID3D12Device, iid, device); -+ return return_interface(&object->ID3D12Device9_iface, &IID_ID3D12Device, iid, device); - } - - /* ID3D12RootSignatureDeserializer */ -@@ -453,11 +453,10 @@ HRESULT vkd3d_serialize_root_signature(const D3D12_ROOT_SIGNATURE_DESC *desc, - if ((ret = vkd3d_shader_serialize_root_signature(&vkd3d_desc, &dxbc, &messages)) < 0) - { - WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); -- if (error_blob && messages) -- { -- if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) -- ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); -- } -+ if (!error_blob) -+ vkd3d_shader_free_messages(messages); -+ else if (messages && FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) -+ ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); - return hresult_from_vkd3d_result(ret); - } - vkd3d_shader_free_messages(messages); -@@ -494,11 +493,10 @@ HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED_ROOT_SIGN - if ((ret = vkd3d_shader_serialize_root_signature(vkd3d_desc, &dxbc, &messages)) < 0) - { - WARN("Failed to serialize root signature, vkd3d result %d.\n", ret); -- if (error_blob && messages) -- { -- if (FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) -- ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); -- } -+ if (!error_blob) -+ vkd3d_shader_free_messages(messages); -+ else if (messages && FAILED(hr = vkd3d_blob_create(messages, strlen(messages), error_blob))) -+ ERR("Failed to create error blob, hr %s.\n", debugstr_hresult(hr)); - return hresult_from_vkd3d_result(ret); - } - vkd3d_shader_free_messages(messages); -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index b092bb26ded..5f60c8d90ad 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -24,10 +24,6 @@ - #define VK_NO_PROTOTYPES - #define CONST_VTABLE - --#ifdef _WIN32 --# define _WIN32_WINNT 0x0600 /* for condition variables */ --#endif -- - #include "vkd3d_common.h" - #include "vkd3d_blob.h" - #include "vkd3d_memory.h" -@@ -55,7 +51,7 @@ - - #define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u - #define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u --#define VKD3D_MAX_SHADER_EXTENSIONS 4u -+#define VKD3D_MAX_SHADER_EXTENSIONS 5u - #define VKD3D_MAX_SHADER_STAGES 5u - #define VKD3D_MAX_VK_SYNC_OBJECTS 4u - #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u -@@ -128,11 +124,13 @@ struct vkd3d_vulkan_info - bool KHR_sampler_mirror_clamp_to_edge; - bool KHR_timeline_semaphore; - /* EXT device extensions */ -+ bool EXT_4444_formats; - bool EXT_calibrated_timestamps; - bool EXT_conditional_rendering; - bool EXT_debug_marker; - bool EXT_depth_clip_enable; - bool EXT_descriptor_indexing; -+ bool EXT_fragment_shader_interlock; - bool EXT_mutable_descriptor_type; - bool EXT_robustness2; - bool EXT_shader_demote_to_helper_invocation; -@@ -184,6 +182,7 @@ struct vkd3d_instance - struct vkd3d_vulkan_info vk_info; - struct vkd3d_vk_global_procs vk_global_procs; - void *libvulkan; -+ uint32_t vk_api_version; - - uint64_t config_flags; - enum vkd3d_api_version api_version; -@@ -202,61 +201,6 @@ union vkd3d_thread_handle - void *handle; - }; - --struct vkd3d_mutex --{ -- CRITICAL_SECTION lock; --}; -- --struct vkd3d_cond --{ -- CONDITION_VARIABLE cond; --}; -- --static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) --{ -- InitializeCriticalSection(&lock->lock); --} -- --static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) --{ -- EnterCriticalSection(&lock->lock); --} -- --static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) --{ -- LeaveCriticalSection(&lock->lock); --} -- --static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) --{ -- DeleteCriticalSection(&lock->lock); --} -- --static inline void vkd3d_cond_init(struct vkd3d_cond *cond) --{ -- InitializeConditionVariable(&cond->cond); --} -- --static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) --{ -- WakeConditionVariable(&cond->cond); --} -- --static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) --{ -- WakeAllConditionVariable(&cond->cond); --} -- --static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) --{ -- if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) -- ERR("Could not sleep on the condition variable, error %lu.\n", GetLastError()); --} -- --static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) --{ --} -- - static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) - { - return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; -@@ -287,98 +231,6 @@ union vkd3d_thread_handle - void *handle; - }; - --struct vkd3d_mutex --{ -- pthread_mutex_t lock; --}; -- --struct vkd3d_cond --{ -- pthread_cond_t cond; --}; -- -- --static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_mutex_init(&lock->lock, NULL); -- if (ret) -- ERR("Could not initialize the mutex, error %d.\n", ret); --} -- --static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_mutex_lock(&lock->lock); -- if (ret) -- ERR("Could not lock the mutex, error %d.\n", ret); --} -- --static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_mutex_unlock(&lock->lock); -- if (ret) -- ERR("Could not unlock the mutex, error %d.\n", ret); --} -- --static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_mutex_destroy(&lock->lock); -- if (ret) -- ERR("Could not destroy the mutex, error %d.\n", ret); --} -- --static inline void vkd3d_cond_init(struct vkd3d_cond *cond) --{ -- int ret; -- -- ret = pthread_cond_init(&cond->cond, NULL); -- if (ret) -- ERR("Could not initialize the condition variable, error %d.\n", ret); --} -- --static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) --{ -- int ret; -- -- ret = pthread_cond_signal(&cond->cond); -- if (ret) -- ERR("Could not signal the condition variable, error %d.\n", ret); --} -- --static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) --{ -- int ret; -- -- ret = pthread_cond_broadcast(&cond->cond); -- if (ret) -- ERR("Could not broadcast the condition variable, error %d.\n", ret); --} -- --static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_cond_wait(&cond->cond, &lock->lock); -- if (ret) -- ERR("Could not wait on the condition variable, error %d.\n", ret); --} -- --static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) --{ -- int ret; -- -- ret = pthread_cond_destroy(&cond->cond); -- if (ret) -- ERR("Could not destroy the condition variable, error %d.\n", ret); --} -- - # if HAVE_SYNC_BOOL_COMPARE_AND_SWAP - static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) - { -@@ -1279,6 +1131,7 @@ struct d3d12_pipeline_state - - struct d3d12_pipeline_uav_counter_state uav_counters; - -+ ID3D12RootSignature *implicit_root_signature; - struct d3d12_device *device; - - struct vkd3d_private_store private_store; -@@ -1735,7 +1588,7 @@ struct vkd3d_desc_object_cache - /* ID3D12Device */ - struct d3d12_device - { -- ID3D12Device7 ID3D12Device7_iface; -+ ID3D12Device9 ID3D12Device9_iface; - unsigned int refcount; - - VkDevice vk_device; -@@ -1743,6 +1596,7 @@ struct d3d12_device - struct vkd3d_vk_device_procs vk_procs; - PFN_vkd3d_signal_event signal_event; - size_t wchar_size; -+ enum vkd3d_shader_spirv_environment environment; - - struct vkd3d_gpu_va_allocator gpu_va_allocator; - -@@ -1810,29 +1664,29 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 - bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); - void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, - const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); --struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface); -+struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface); - HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); - void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); - - static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) - { -- return ID3D12Device7_QueryInterface(&device->ID3D12Device7_iface, iid, object); -+ return ID3D12Device9_QueryInterface(&device->ID3D12Device9_iface, iid, object); - } - - static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) - { -- return ID3D12Device7_AddRef(&device->ID3D12Device7_iface); -+ return ID3D12Device9_AddRef(&device->ID3D12Device9_iface); - } - - static inline ULONG d3d12_device_release(struct d3d12_device *device) - { -- return ID3D12Device7_Release(&device->ID3D12Device7_iface); -+ return ID3D12Device9_Release(&device->ID3D12Device9_iface); - } - - static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) - { -- return ID3D12Device7_GetDescriptorHandleIncrementSize(&device->ID3D12Device7_iface, descriptor_type); -+ return ID3D12Device9_GetDescriptorHandleIncrementSize(&device->ID3D12Device9_iface, descriptor_type); - } - - /* utils */ -@@ -1993,4 +1847,10 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) - vkd3d_header->next = vkd3d_structure; - } - -+struct vkd3d_shader_cache; -+ -+int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache); -+unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache); -+unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache); -+ - #endif /* __VKD3D_PRIVATE_H */ --- -2.43.0 - diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-a64426edf9fa0e36cbe982656dc606bf1e0.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-a64426edf9fa0e36cbe982656dc606bf1e0.patch deleted file mode 100644 index 8a1cc764..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-a64426edf9fa0e36cbe982656dc606bf1e0.patch +++ /dev/null @@ -1,982 +0,0 @@ -From 0d623c008de7908d9e97843e44278a6f8443c85c Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 23 May 2024 07:37:44 +1000 -Subject: [PATCH] Updated vkd3d to a64426edf9fa0e36cbe982656dc606bf1e02822f. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 63 +++++++ - libs/vkd3d/include/vkd3d_types.h | 6 + - libs/vkd3d/libs/vkd3d-common/blob.c | 3 + - libs/vkd3d/libs/vkd3d-common/error.c | 6 + - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 61 ++++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 2 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 122 +++++++++++++- - libs/vkd3d/libs/vkd3d/cache.c | 195 ++++++++++++++++++++++ - libs/vkd3d/libs/vkd3d/device.c | 42 ++++- - libs/vkd3d/libs/vkd3d/resource.c | 24 +-- - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 2 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 92 ++-------- - 12 files changed, 501 insertions(+), 117 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index b0e9230dab6..2d950b4f7aa 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -340,6 +340,11 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) - return (x > y) - (x < y); - } - -+static inline int vkd3d_u64_compare(uint64_t x, uint64_t y) -+{ -+ return (x > y) - (x < y); -+} -+ - #define VKD3D_BITMAP_SIZE(x) (((x) + 0x1f) >> 5) - - static inline bool bitmap_clear(uint32_t *map, unsigned int idx) -@@ -431,6 +436,64 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) - return vkd3d_atomic_add_fetch_u32(x, 1); - } - -+static inline bool vkd3d_atomic_compare_exchange_u32(uint32_t volatile *x, uint32_t expected, uint32_t val) -+{ -+#if HAVE_SYNC_BOOL_COMPARE_AND_SWAP -+ return __sync_bool_compare_and_swap(x, expected, val); -+#elif defined(_WIN32) -+ return InterlockedCompareExchange((LONG *)x, val, expected) == expected; -+#else -+# error "vkd3d_atomic_compare_exchange_u32() not implemented for this platform" -+#endif -+} -+ -+static inline bool vkd3d_atomic_compare_exchange_ptr(void * volatile *x, void *expected, void *val) -+{ -+#if HAVE_SYNC_BOOL_COMPARE_AND_SWAP -+ return __sync_bool_compare_and_swap(x, expected, val); -+#elif defined(_WIN32) -+ return InterlockedCompareExchangePointer(x, val, expected) == expected; -+#else -+# error "vkd3d_atomic_compare_exchange_ptr() not implemented for this platform" -+#endif -+} -+ -+static inline uint32_t vkd3d_atomic_exchange_u32(uint32_t volatile *x, uint32_t val) -+{ -+#if HAVE_ATOMIC_EXCHANGE_N -+ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); -+#elif defined(_WIN32) -+ return InterlockedExchange((LONG *)x, val); -+#else -+ uint32_t expected; -+ -+ do -+ { -+ expected = *x; -+ } while (!vkd3d_atomic_compare_exchange_u32(x, expected, val)); -+ -+ return expected; -+#endif -+} -+ -+static inline void *vkd3d_atomic_exchange_ptr(void * volatile *x, void *val) -+{ -+#if HAVE_ATOMIC_EXCHANGE_N -+ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); -+#elif defined(_WIN32) -+ return InterlockedExchangePointer(x, val); -+#else -+ void *expected; -+ -+ do -+ { -+ expected = *x; -+ } while (!vkd3d_atomic_compare_exchange_ptr(x, expected, val)); -+ -+ return expected; -+#endif -+} -+ - struct vkd3d_mutex - { - #ifdef _WIN32 -diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h -index 017eaf11806..dc5a7c064ae 100644 ---- a/libs/vkd3d/include/vkd3d_types.h -+++ b/libs/vkd3d/include/vkd3d_types.h -@@ -53,6 +53,12 @@ enum vkd3d_result - VKD3D_ERROR_INVALID_SHADER = -4, - /** The operation is not implemented in this version of vkd3d. */ - VKD3D_ERROR_NOT_IMPLEMENTED = -5, -+ /** The object or entry already exists. \since 1.12 */ -+ VKD3D_ERROR_KEY_ALREADY_EXISTS = -6, -+ /** The requested object was not found. \since 1.12 */ -+ VKD3D_ERROR_NOT_FOUND = -7, -+ /** The output buffer is larger than the requested object \since 1.12. */ -+ VKD3D_ERROR_MORE_DATA = -8, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_RESULT), - }; -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index 6bc95dc55c4..c2c6ad67804 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -16,6 +16,9 @@ - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -+#ifndef __MINGW32__ -+#define WIDL_C_INLINE_WRAPPERS -+#endif - #define COBJMACROS - - #define CONST_VTABLE -diff --git a/libs/vkd3d/libs/vkd3d-common/error.c b/libs/vkd3d/libs/vkd3d-common/error.c -index b8350a5404c..2f978c4977d 100644 ---- a/libs/vkd3d/libs/vkd3d-common/error.c -+++ b/libs/vkd3d/libs/vkd3d-common/error.c -@@ -35,6 +35,12 @@ HRESULT hresult_from_vkd3d_result(int vkd3d_result) - return E_INVALIDARG; - case VKD3D_ERROR_NOT_IMPLEMENTED: - return E_NOTIMPL; -+ case VKD3D_ERROR_KEY_ALREADY_EXISTS: -+ return DXGI_ERROR_ALREADY_EXISTS; -+ case VKD3D_ERROR_NOT_FOUND: -+ return DXGI_ERROR_NOT_FOUND; -+ case VKD3D_ERROR_MORE_DATA: -+ return DXGI_ERROR_MORE_DATA; - default: - FIXME("Unhandled vkd3d result %d.\n", vkd3d_result); - return E_FAIL; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index cda73d48fc0..58f830dd887 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1786,6 +1786,7 @@ static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) - struct sm1_instruction - { - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; -+ unsigned int flags; - - struct sm1_dst_register - { -@@ -1825,6 +1826,8 @@ static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu - uint32_t token = instr->opcode; - unsigned int i; - -+ token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); -+ - if (ctx->profile->major_version > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); -@@ -2387,6 +2390,49 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - -+static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_block *block); -+ -+static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_if *iff = hlsl_ir_if(instr); -+ const struct hlsl_ir_node *condition; -+ struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; -+ -+ condition = iff->condition.node; -+ assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); -+ -+ sm1_ifc = (struct sm1_instruction) -+ { -+ .opcode = D3DSIO_IFC, -+ .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), -+ .srcs[0].reg = condition->reg.id, -+ .srcs[0].mod = 0, -+ -+ .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), -+ .srcs[1].reg = condition->reg.id, -+ .srcs[1].mod = D3DSPSM_NEG, -+ -+ .src_count = 2, -+ }; -+ write_sm1_instruction(ctx, buffer, &sm1_ifc); -+ write_sm1_block(ctx, buffer, &iff->then_block); -+ -+ if (!list_empty(&iff->else_block.instrs)) -+ { -+ sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; -+ write_sm1_instruction(ctx, buffer, &sm1_else); -+ write_sm1_block(ctx, buffer, &iff->else_block); -+ } -+ -+ sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; -+ write_sm1_instruction(ctx, buffer, &sm1_endif); -+} -+ - static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -@@ -2587,12 +2633,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - write_sm1_instruction(ctx, buffer, &sm1_instr); - } - --static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_function_decl *entry_func) -+static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_block *block) - { - const struct hlsl_ir_node *instr; - -- LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { -@@ -2616,6 +2662,13 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - write_sm1_expr(ctx, buffer, instr); - break; - -+ case HLSL_IR_IF: -+ if (hlsl_version_ge(ctx, 2, 1)) -+ write_sm1_if(ctx, buffer, instr); -+ else -+ hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); -+ break; -+ - case HLSL_IR_JUMP: - write_sm1_jump(ctx, buffer, instr); - break; -@@ -2653,7 +2706,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun - write_sm1_constant_defs(ctx, &buffer); - write_sm1_semantic_dcls(ctx, &buffer); - write_sm1_sampler_dcls(ctx, &buffer); -- write_sm1_instructions(ctx, &buffer, entry_func); -+ write_sm1_block(ctx, &buffer, &entry_func->body); - - put_u32(&buffer, D3DSIO_END); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 88b917eff11..a5923d8bf8e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -191,7 +191,7 @@ while {return KW_WHILE; } - %= {return OP_MODASSIGN; } - &= {return OP_ANDASSIGN; } - \|= {return OP_ORASSIGN; } --^= {return OP_XORASSIGN; } -+\^= {return OP_XORASSIGN; } - - {IDENTIFIER} { - struct hlsl_ctx *ctx = yyget_extra(yyscanner); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 9514ddb980f..37818d4dfad 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -83,6 +83,106 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, - return true; - } - -+static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type, -+ enum vkd3d_shader_opcode *opcode, bool *requires_swap) -+{ -+ switch (rel_op) -+ { -+ case VKD3D_SHADER_REL_OP_LT: -+ case VKD3D_SHADER_REL_OP_GT: -+ *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_GT); -+ if (data_type == VKD3D_DATA_FLOAT) -+ { -+ *opcode = VKD3DSIH_LTO; -+ return true; -+ } -+ break; -+ -+ case VKD3D_SHADER_REL_OP_GE: -+ case VKD3D_SHADER_REL_OP_LE: -+ *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_LE); -+ if (data_type == VKD3D_DATA_FLOAT) -+ { -+ *opcode = VKD3DSIH_GEO; -+ return true; -+ } -+ break; -+ -+ case VKD3D_SHADER_REL_OP_EQ: -+ *requires_swap = false; -+ if (data_type == VKD3D_DATA_FLOAT) -+ { -+ *opcode = VKD3DSIH_EQO; -+ return true; -+ } -+ break; -+ -+ case VKD3D_SHADER_REL_OP_NE: -+ *requires_swap = false; -+ if (data_type == VKD3D_DATA_FLOAT) -+ { -+ *opcode = VKD3DSIH_NEO; -+ return true; -+ } -+ break; -+ } -+ return false; -+} -+ -+static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, -+ struct vkd3d_shader_instruction *ifc, unsigned int *tmp_idx, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = ifc - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_opcode opcode; -+ bool swap; -+ -+ if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (*tmp_idx == ~0u) -+ *tmp_idx = program->temp_count++; -+ -+ /* Replace ifc comparison with actual comparison, saving the result in the tmp register. */ -+ if (!(get_opcode_from_rel_op(ifc->flags, ifc->src[0].reg.data_type, &opcode, &swap))) -+ { -+ vkd3d_shader_error(message_context, &ifc->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Aborting due to not yet implemented feature: opcode for rel_op %u and data type %u.", -+ ifc->flags, ifc->src[0].reg.data_type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ -+ ins = &instructions->elements[pos + 1]; -+ if (!vsir_instruction_init_with_params(program, ins, &ifc->location, opcode, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].reg.idx[0].offset = *tmp_idx; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; -+ -+ ins->src[0] = ifc->src[swap]; -+ ins->src[1] = ifc->src[!swap]; -+ -+ /* Create new if instruction using the previous result. */ -+ ins = &instructions->elements[pos + 2]; -+ if (!vsir_instruction_init_with_params(program, ins, &ifc->location, VKD3DSIH_IF, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; -+ -+ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].reg.idx[0].offset = *tmp_idx; -+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ -+ /* Make the original instruction no-op */ -+ vkd3d_shader_instruction_make_nop(ifc); -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, - struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) - { -@@ -211,7 +311,8 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro - return VKD3D_OK; - } - --static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) -+static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_instruction_array *instructions = &program->instructions; - unsigned int tmp_idx = ~0u, i; -@@ -223,6 +324,11 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - - switch (ins->handler_idx) - { -+ case VKD3DSIH_IFC: -+ if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) -+ return ret; -+ break; -+ - case VKD3DSIH_TEXKILL: - if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) - return ret; -@@ -4993,12 +5099,12 @@ static void register_map_undominated_use(struct vkd3d_shader_register *reg, stru - { - unsigned int i; - -- if (!register_is_ssa(reg)) -- return; -- -- i = reg->idx[0].offset; -- if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) -- alloc->table[i] = alloc->next_temp_idx++; -+ if (register_is_ssa(reg)) -+ { -+ i = reg->idx[0].offset; -+ if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) -+ alloc->table[i] = alloc->next_temp_idx++; -+ } - - for (i = 0; i < reg->idx_count; ++i) - if (reg->idx[i].rel_addr) -@@ -6080,7 +6186,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - { - enum vkd3d_result result = VKD3D_OK; - -- if ((result = vsir_program_lower_instructions(program)) < 0) -+ if ((result = vsir_program_lower_instructions(program, message_context)) < 0) - return result; - - if (program->shader_version.major >= 6) -diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c -index 56ba6990420..a0a29ed30cb 100644 ---- a/libs/vkd3d/libs/vkd3d/cache.c -+++ b/libs/vkd3d/libs/vkd3d/cache.c -@@ -18,11 +18,60 @@ - - #include "vkd3d_private.h" - -+struct vkd3d_cache_entry_header -+{ -+ uint64_t hash; -+ uint64_t key_size; -+ uint64_t value_size; -+}; -+ - struct vkd3d_shader_cache - { - unsigned int refcount; -+ struct vkd3d_mutex lock; -+ -+ struct rb_tree tree; - }; - -+struct shader_cache_entry -+{ -+ struct vkd3d_cache_entry_header h; -+ struct rb_entry entry; -+ uint8_t *payload; -+}; -+ -+struct shader_cache_key -+{ -+ uint64_t hash; -+ const void *key; -+ uint64_t key_size; -+}; -+ -+static int vkd3d_shader_cache_compare_key(const void *key, const struct rb_entry *entry) -+{ -+ const struct shader_cache_entry *e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); -+ const struct shader_cache_key *k = key; -+ int ret; -+ -+ if ((ret = vkd3d_u64_compare(k->hash, e->h.hash))) -+ return ret; -+ if ((ret = vkd3d_u64_compare(k->key_size, e->h.key_size))) -+ return ret; -+ -+ /* Until now we have not seen an actual hash collision. If the key didn't match it was always -+ * due to a bug in the serialization code or memory corruption. If you see this FIXME please -+ * investigate. */ -+ if ((ret = memcmp(k->key, e->payload, k->key_size))) -+ FIXME("Actual case of a hash collision found.\n"); -+ return ret; -+} -+ -+static void vkd3d_shader_cache_add_entry(struct vkd3d_shader_cache *cache, -+ struct shader_cache_entry *e) -+{ -+ rb_put(&cache->tree, &e->h.hash, &e->entry); -+} -+ - int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) - { - struct vkd3d_shader_cache *object; -@@ -34,6 +83,9 @@ int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) - return VKD3D_ERROR_OUT_OF_MEMORY; - - object->refcount = 1; -+ rb_init(&object->tree, vkd3d_shader_cache_compare_key); -+ vkd3d_mutex_init(&object->lock); -+ - *cache = object; - - return VKD3D_OK; -@@ -46,6 +98,13 @@ unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache) - return refcount; - } - -+static void vkd3d_shader_cache_destroy_entry(struct rb_entry *entry, void *context) -+{ -+ struct shader_cache_entry *e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); -+ vkd3d_free(e->payload); -+ vkd3d_free(e); -+} -+ - unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) - { - unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount); -@@ -54,6 +113,142 @@ unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) - if (refcount) - return refcount; - -+ rb_destroy(&cache->tree, vkd3d_shader_cache_destroy_entry, NULL); -+ vkd3d_mutex_destroy(&cache->lock); -+ - vkd3d_free(cache); - return 0; - } -+ -+static uint64_t vkd3d_shader_cache_hash_key(const void *key, size_t size) -+{ -+ static const uint64_t fnv_prime = 0x00000100000001b3; -+ uint64_t hash = 0xcbf29ce484222325; -+ const uint8_t *k = key; -+ size_t i; -+ -+ for (i = 0; i < size; ++i) -+ hash = (hash ^ k[i]) * fnv_prime; -+ -+ return hash; -+} -+ -+static void vkd3d_shader_cache_lock(struct vkd3d_shader_cache *cache) -+{ -+ vkd3d_mutex_lock(&cache->lock); -+} -+ -+static void vkd3d_shader_cache_unlock(struct vkd3d_shader_cache *cache) -+{ -+ vkd3d_mutex_unlock(&cache->lock); -+} -+ -+int vkd3d_shader_cache_put(struct vkd3d_shader_cache *cache, -+ const void *key, size_t key_size, const void *value, size_t value_size) -+{ -+ struct shader_cache_entry *e; -+ struct shader_cache_key k; -+ struct rb_entry *entry; -+ enum vkd3d_result ret; -+ -+ TRACE("%p, %p, %#zx, %p, %#zx.\n", cache, key, key_size, value, value_size); -+ -+ k.hash = vkd3d_shader_cache_hash_key(key, key_size); -+ k.key = key; -+ k.key_size = key_size; -+ -+ vkd3d_shader_cache_lock(cache); -+ -+ entry = rb_get(&cache->tree, &k); -+ e = entry ? RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry) : NULL; -+ -+ if (e) -+ { -+ WARN("Key already exists, returning VKD3D_ERROR_KEY_ALREADY_EXISTS.\n"); -+ ret = VKD3D_ERROR_KEY_ALREADY_EXISTS; -+ goto done; -+ } -+ -+ e = vkd3d_malloc(sizeof(*e)); -+ if (!e) -+ { -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto done; -+ } -+ e->payload = vkd3d_malloc(key_size + value_size); -+ if (!e->payload) -+ { -+ vkd3d_free(e); -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto done; -+ } -+ -+ e->h.key_size = key_size; -+ e->h.value_size = value_size; -+ e->h.hash = k.hash; -+ memcpy(e->payload, key, key_size); -+ memcpy(e->payload + key_size, value, value_size); -+ -+ vkd3d_shader_cache_add_entry(cache, e); -+ TRACE("Cache entry %#"PRIx64" stored.\n", k.hash); -+ ret = VKD3D_OK; -+ -+done: -+ vkd3d_shader_cache_unlock(cache); -+ return ret; -+} -+ -+int vkd3d_shader_cache_get(struct vkd3d_shader_cache *cache, -+ const void *key, size_t key_size, void *value, size_t *value_size) -+{ -+ struct shader_cache_entry *e; -+ struct shader_cache_key k; -+ struct rb_entry *entry; -+ enum vkd3d_result ret; -+ size_t size_in; -+ -+ TRACE("%p, %p, %#zx, %p, %p.\n", cache, key, key_size, value, value_size); -+ -+ size_in = *value_size; -+ -+ k.hash = vkd3d_shader_cache_hash_key(key, key_size); -+ k.key = key; -+ k.key_size = key_size; -+ -+ vkd3d_shader_cache_lock(cache); -+ -+ entry = rb_get(&cache->tree, &k); -+ if (!entry) -+ { -+ WARN("Entry not found.\n"); -+ ret = VKD3D_ERROR_NOT_FOUND; -+ goto done; -+ } -+ -+ e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); -+ -+ *value_size = e->h.value_size; -+ if (!value) -+ { -+ TRACE("Found item %#"PRIx64", returning needed size %#"PRIx64".\n", -+ e->h.hash, e->h.value_size); -+ ret = VKD3D_OK; -+ goto done; -+ } -+ -+ if (size_in < e->h.value_size) -+ { -+ WARN("Output buffer is too small for item %#"PRIx64", got %#zx want %#"PRIx64".\n", -+ e->h.hash, size_in, e->h.value_size); -+ ret = VKD3D_ERROR_MORE_DATA; -+ goto done; -+ } -+ -+ memcpy(value, e->payload + e->h.key_size, e->h.value_size); -+ ret = VKD3D_OK; -+ TRACE("Returning cached item %#"PRIx64".\n", e->h.hash); -+ -+done: -+ vkd3d_shader_cache_unlock(cache); -+ return ret; -+} -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index c8cfea43cc1..cfc9c5f5ed3 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -1334,11 +1334,11 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev - - fragment_shader_interlock_features = &info->fragment_shader_interlock_features; - TRACE(" VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT:\n"); -- TRACE(" fragmentShaderSampleInterlock: %#x.\n.", -+ TRACE(" fragmentShaderSampleInterlock: %#x.\n", - fragment_shader_interlock_features->fragmentShaderSampleInterlock); -- TRACE(" fragmentShaderPixelInterlock: %#x\n.", -+ TRACE(" fragmentShaderPixelInterlock: %#x.\n", - fragment_shader_interlock_features->fragmentShaderPixelInterlock); -- TRACE(" fragmentShaderShadingRateInterlock: %#x\n.", -+ TRACE(" fragmentShaderShadingRateInterlock: %#x.\n", - fragment_shader_interlock_features->fragmentShaderShadingRateInterlock); - - demote_features = &info->demote_features; -@@ -2740,19 +2740,43 @@ static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetDevice(ID3D12ShaderCache - static HRESULT STDMETHODCALLTYPE d3d12_cache_session_FindValue(ID3D12ShaderCacheSession *iface, - const void *key, UINT key_size, void *value, UINT *value_size) - { -- FIXME("iface %p, key %p, key_size %#x, value %p, value_size %p stub!\n", -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ enum vkd3d_result ret; -+ size_t size; -+ -+ TRACE("iface %p, key %p, key_size %#x, value %p, value_size %p.\n", - iface, key, key_size, value, value_size); - -- return DXGI_ERROR_NOT_FOUND; -+ if (!value_size) -+ { -+ WARN("value_size is NULL, returning E_INVALIDARG.\n"); -+ return E_INVALIDARG; -+ } -+ -+ size = *value_size; -+ ret = vkd3d_shader_cache_get(session->cache, key, key_size, value, &size); -+ *value_size = size; -+ -+ return hresult_from_vkd3d_result(ret); - } - - static HRESULT STDMETHODCALLTYPE d3d12_cache_session_StoreValue(ID3D12ShaderCacheSession *iface, - const void *key, UINT key_size, const void *value, UINT value_size) - { -- FIXME("iface %p, key %p, key_size %#x, value %p, value_size %u stub!\n", iface, key, key_size, -- value, value_size); -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ enum vkd3d_result ret; - -- return E_NOTIMPL; -+ TRACE("iface %p, key %p, key_size %#x, value %p, value_size %u.\n", -+ iface, key, key_size, value, value_size); -+ -+ if (!key || !key_size || !value || !value_size) -+ { -+ WARN("Invalid input parameters, returning E_INVALIDARG.\n"); -+ return E_INVALIDARG; -+ } -+ -+ ret = vkd3d_shader_cache_put(session->cache, key, key_size, value, value_size); -+ return hresult_from_vkd3d_result(ret); - } - - static void STDMETHODCALLTYPE d3d12_cache_session_SetDeleteOnDestroy(ID3D12ShaderCacheSession *iface) -@@ -2888,7 +2912,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device9 *ifac - || IsEqualGUID(riid, &IID_ID3D12Object) - || IsEqualGUID(riid, &IID_IUnknown)) - { -- ID3D12Device_AddRef(iface); -+ ID3D12Device9_AddRef(iface); - *object = iface; - return S_OK; - } -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 179999148bc..7a2f464c98e 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -1271,7 +1271,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource2 * - || IsEqualGUID(riid, &IID_ID3D12Object) - || IsEqualGUID(riid, &IID_IUnknown)) - { -- ID3D12Resource_AddRef(iface); -+ ID3D12Resource2_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -2350,16 +2350,16 @@ static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) - i = vkd3d_atomic_increment_u32(&cache->next_index) & HEAD_INDEX_MASK; - for (;;) - { -- if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) -+ if (vkd3d_atomic_compare_exchange_u32(&cache->heads[i].spinlock, 0, 1)) - { - if ((u.object = cache->heads[i].head)) - { - vkd3d_atomic_decrement_u32(&cache->free_count); - cache->heads[i].head = u.header->next; -- vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); - return u.object; - } -- vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); - } - /* Keeping a free count avoids uncertainty over when this loop should terminate, - * which could result in excess allocations gradually increasing without limit. */ -@@ -2381,7 +2381,7 @@ static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, - i = vkd3d_atomic_increment_u32(&cache->next_index) & HEAD_INDEX_MASK; - for (;;) - { -- if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) -+ if (vkd3d_atomic_compare_exchange_u32(&cache->heads[i].spinlock, 0, 1)) - break; - i = (i + 1) & HEAD_INDEX_MASK; - } -@@ -2389,7 +2389,7 @@ static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, - head = cache->heads[i].head; - u.header->next = head; - cache->heads[i].head = u.object; -- vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); - vkd3d_atomic_increment_u32(&cache->free_count); - } - -@@ -2473,7 +2473,7 @@ void vkd3d_view_decref(void *view, struct d3d12_device *device) - - static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) - { -- if ((view = vkd3d_atomic_exchange_pointer(&dst->s.u.object, view))) -+ if ((view = vkd3d_atomic_exchange_ptr(&dst->s.u.object, view))) - vkd3d_view_decref(view, device); - } - -@@ -2652,7 +2652,7 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr - union d3d12_desc_object u; - unsigned int i, next; - -- if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) -+ if ((i = vkd3d_atomic_exchange_u32(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) - return; - - writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; -@@ -2667,7 +2667,7 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr - for (; i != UINT_MAX; i = next) - { - src = &descriptors[i]; -- next = vkd3d_atomic_exchange(&src->next, 0); -+ next = vkd3d_atomic_exchange_u32(&src->next, 0); - next = (int)next >> 1; - - /* A race exists here between updating src->next and getting the current object. The best -@@ -2695,13 +2695,13 @@ static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_des - head = descriptor_heap->dirty_list_head; - - /* Only one thread can swap the value away from zero. */ -- if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) -+ if (!vkd3d_atomic_compare_exchange_u32(&dst->next, 0, (head << 1) | 1)) - return; - /* Now it is safe to modify 'next' to another nonzero value if necessary. */ -- while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) -+ while (!vkd3d_atomic_compare_exchange_u32(&descriptor_heap->dirty_list_head, head, i)) - { - head = descriptor_heap->dirty_list_head; -- vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); -+ vkd3d_atomic_exchange_u32(&dst->next, (head << 1) | 1); - } - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index 29305fbdc63..c7431bd821b 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -71,7 +71,7 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, - - if (!device) - { -- ID3D12Device_Release(&object->ID3D12Device9_iface); -+ ID3D12Device9_Release(&object->ID3D12Device9_iface); - return S_FALSE; - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 5f60c8d90ad..d1fa866d9e3 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -19,6 +19,9 @@ - #ifndef __VKD3D_PRIVATE_H - #define __VKD3D_PRIVATE_H - -+#ifndef __MINGW32__ -+#define WIDL_C_INLINE_WRAPPERS -+#endif - #define COBJMACROS - #define NONAMELESSUNION - #define VK_NO_PROTOTYPES -@@ -194,93 +197,14 @@ struct vkd3d_instance - unsigned int refcount; - }; - --#ifdef _WIN32 -- --union vkd3d_thread_handle --{ -- void *handle; --}; -- --static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) --{ -- return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; --} -- --static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) --{ -- return InterlockedExchange((LONG volatile *)x, val); --} -- --static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) --{ -- return InterlockedCompareExchangePointer(x, xchg, cmp) == cmp; --} -- --static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) --{ -- return InterlockedExchangePointer(x, val); --} -- --#else /* _WIN32 */ -- --#include -- - union vkd3d_thread_handle - { -+#ifndef _WIN32 - pthread_t pthread; -+#endif - void *handle; - }; - --# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP --static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) --{ -- return __sync_bool_compare_and_swap(x, cmp, xchg); --} -- --static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) --{ -- return __sync_bool_compare_and_swap(x, cmp, xchg); --} --# else --# error "vkd3d_atomic_compare_exchange() not implemented for this platform" --# endif -- --# if HAVE_ATOMIC_EXCHANGE_N --static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) --{ -- return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); --} -- --static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) --{ -- return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); --} --# elif HAVE_SYNC_BOOL_COMPARE_AND_SWAP --static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) --{ -- unsigned int i; -- do -- { -- i = *x; -- } while (!__sync_bool_compare_and_swap(x, i, val)); -- return i; --} -- --static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) --{ -- void *p; -- do -- { -- p = *x; -- } while (!__sync_bool_compare_and_swap(x, p, val)); -- return p; --} --# else --# error "vkd3d_atomic_exchange() not implemented for this platform" --# endif -- --#endif /* _WIN32 */ -- - HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, - PFN_vkd3d_thread thread_main, void *data, union vkd3d_thread_handle *thread); - HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_handle *thread); -@@ -742,7 +666,7 @@ static inline bool vkd3d_view_incref(void *desc) - if (refcount <= 0) - return false; - } -- while (!vkd3d_atomic_compare_exchange(&h->refcount, refcount, refcount + 1)); -+ while (!vkd3d_atomic_compare_exchange_u32(&h->refcount, refcount, refcount + 1)); - - return true; - } -@@ -1852,5 +1776,9 @@ struct vkd3d_shader_cache; - int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache); - unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache); - unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache); -+int vkd3d_shader_cache_put(struct vkd3d_shader_cache *cache, -+ const void *key, size_t key_size, const void *value, size_t value_size); -+int vkd3d_shader_cache_get(struct vkd3d_shader_cache *cache, -+ const void *key, size_t key_size, void *value, size_t *value_size); - - #endif /* __VKD3D_PRIVATE_H */ --- -2.43.0 - diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-f090d1e80d8b6617b71f25dd422665b4475.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-f090d1e80d8b6617b71f25dd422665b4475.patch deleted file mode 100644 index 45b6f0e7..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-f090d1e80d8b6617b71f25dd422665b4475.patch +++ /dev/null @@ -1,201 +0,0 @@ -From e0214af03df8d0b0212f99c79a961003ffc63ff6 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 24 May 2024 07:56:01 +1000 -Subject: [PATCH] Updated vkd3d to f090d1e80d8b6617b71f25dd422665b44759f3d0. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 11 ++++++----- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 10 ++++++++-- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 17 ++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 2 +- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 18 +++++++++++++++++- - 6 files changed, 49 insertions(+), 11 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 58f830dd887..bfd5b52b436 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -559,7 +559,8 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - element = &signature->elements[signature->element_count++]; - - memset(element, 0, sizeof(*element)); -- element->semantic_name = name; -+ if (!(element->semantic_name = vkd3d_strdup(name))) -+ return false; - element->semantic_index = index; - element->sysval_semantic = sysval; - element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -@@ -2050,7 +2051,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.\n", -+ hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.", - debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type)); - break; - } -@@ -2458,7 +2459,7 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - - default: -- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); -+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - } - } - -@@ -2546,7 +2547,7 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - break; - - default: -- hlsl_fixme(ctx, &instr->loc, "Resource load type %u\n", load->load_type); -+ hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); - return; - } - -@@ -2578,7 +2579,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - - if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) - { -- hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks.\n"); -+ hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks."); - return; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 8a1012d909b..4b9f67235aa 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -360,7 +360,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - uint32_t count, header_size; - struct signature_element *e; - const char *ptr = data; -- unsigned int i; -+ unsigned int i, j; - - if (!require_space(0, 2, sizeof(uint32_t), section->data.size)) - { -@@ -403,6 +403,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - for (i = 0; i < count; ++i) - { - size_t name_offset; -+ const char *name; - uint32_t mask; - - e[i].sort_index = i; -@@ -413,9 +414,14 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - e[i].stream_index = 0; - - name_offset = read_u32(&ptr); -- if (!(e[i].semantic_name = shader_get_string(data, section->data.size, name_offset))) -+ if (!(name = shader_get_string(data, section->data.size, name_offset)) -+ || !(e[i].semantic_name = vkd3d_strdup(name))) - { - WARN("Invalid name offset %#zx (data size %#zx).\n", name_offset, section->data.size); -+ for (j = 0; j < i; ++j) -+ { -+ vkd3d_free((void *)e[j].semantic_name); -+ } - vkd3d_free(e); - return VKD3D_ERROR_INVALID_ARGUMENT; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 4943a586680..73a8d8687c5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -9445,7 +9445,22 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - } - } - -- vkd3d_free(s->elements); -+ for (i = 0; i < operand_count; ++i) -+ { -+ if ((elements[i].semantic_name = vkd3d_strdup(elements[i].semantic_name))) -+ continue; -+ -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Failed to allocate signature element semantic name."); -+ for (j = 0; j < i; ++j) -+ { -+ vkd3d_free((void *)elements[j].semantic_name); -+ } -+ vkd3d_free(elements); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ shader_signature_cleanup(s); - s->elements = elements; - s->element_count = operand_count; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 27f16af51c5..bdb72a1fab9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -2988,7 +2988,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - - if (cond->data_type->class > HLSL_CLASS_VECTOR || instr->data_type->class > HLSL_CLASS_VECTOR) - { -- hlsl_fixme(ctx, &instr->loc, "Lower ternary of type other than scalar or vector.\n"); -+ hlsl_fixme(ctx, &instr->loc, "Lower ternary of type other than scalar or vector."); - return false; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index 6fb61eff6c3..a3cdbe559a7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -66,7 +66,7 @@ static void update_location(struct preproc_ctx *ctx); - %s LINE - - NEWLINE \r?\n --WS [ \t] -+WS [ \t\r] - IDENTIFIER [A-Za-z_][A-Za-z0-9_]* - INT_SUFFIX [uUlL]{0,2} - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index b8dd0dba377..46c0da2a2d7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -657,7 +657,15 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig - struct vkd3d_shader_signature_element *d = &signature->elements[i]; - struct signature_element *e = &src->elements[i]; - -- d->semantic_name = e->semantic_name; -+ if (!(d->semantic_name = vkd3d_strdup(e->semantic_name))) -+ { -+ for (unsigned int j = 0; j < i; ++j) -+ { -+ vkd3d_free((void *)signature->elements[j].semantic_name); -+ } -+ vkd3d_free(signature->elements); -+ return false; -+ } - d->semantic_index = e->semantic_index; - d->stream_index = e->stream_index; - d->sysval_semantic = e->sysval_semantic; -@@ -1763,6 +1771,10 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu - - void shader_signature_cleanup(struct shader_signature *signature) - { -+ for (unsigned int i = 0; i < signature->element_count; ++i) -+ { -+ vkd3d_free((void *)signature->elements[i].semantic_name); -+ } - vkd3d_free(signature->elements); - signature->elements = NULL; - } -@@ -1820,6 +1832,10 @@ void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature - { - TRACE("signature %p.\n", signature); - -+ for (unsigned int i = 0; i < signature->element_count; ++i) -+ { -+ vkd3d_free((void *)signature->elements[i].semantic_name); -+ } - vkd3d_free(signature->elements); - signature->elements = NULL; - } --- -2.43.0 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-9693271dcfb96e9cd8d44c181cf70044edb.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-9693271dcfb96e9cd8d44c181cf70044edb.patch deleted file mode 100644 index b659d3dc..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-9693271dcfb96e9cd8d44c181cf70044edb.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 112fd3af5d768c38517cfd4462ae586cf40f13c2 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Tue, 28 May 2024 07:40:44 +1000 -Subject: [PATCH] Updated vkd3d to 9693271dcfb96e9cd8d44c181cf70044edbf6861. - ---- - libs/vkd3d/include/vkd3d_shader.h | 6 +++++- - libs/vkd3d/include/vkd3d_types.h | 2 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 1 + - libs/vkd3d/libs/vkd3d/resource.c | 6 ++++-- - 4 files changed, 11 insertions(+), 4 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 2b32b8a3e98..082f34179a3 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -2059,8 +2059,12 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported - * - VKD3D_SHADER_SOURCE_D3D_BYTECODE to VKD3D_SHADER_TARGET_SPIRV_TEXT - * (if vkd3d was compiled with SPIRV-Tools) - * - VKD3D_SHADER_SOURCE_D3D_BYTECODE to VKD3D_SHADER_TARGET_D3D_ASM -- * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_DXBC_TPF -+ * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_SPIRV_BINARY -+ * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_SPIRV_TEXT -+ * (if vkd3d was compiled with SPIRV-Tools) -+ * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_D3D_ASM - * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_D3D_BYTECODE -+ * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_DXBC_TPF - * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_FX - * - * Supported transformations can also be detected at runtime with the functions -diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h -index dc5a7c064ae..f5a10117c12 100644 ---- a/libs/vkd3d/include/vkd3d_types.h -+++ b/libs/vkd3d/include/vkd3d_types.h -@@ -41,7 +41,7 @@ enum vkd3d_result - { - /** Success. */ - VKD3D_OK = 0, -- /** Success as a result of there being nothing to do. */ -+ /** Success as a result of there being nothing to do. \since 1.12 */ - VKD3D_FALSE = 1, - /** An unspecified failure occurred. */ - VKD3D_ERROR = -1, -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 37818d4dfad..b3b745fc1b2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -1886,6 +1886,7 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; - srcs[1].reg.idx_count = 2; - srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; -+ srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; - srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; - - srcs[2].reg.type = VKD3DSPR_SAMPLER; -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 7a2f464c98e..c897d9f2c5a 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -4284,12 +4284,14 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript - VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; - VkDescriptorSetAllocateInfo set_desc; - VkResult vr; -+ HRESULT hr; - - if (!device->vk_descriptor_heap_layouts[set].vk_set_layout) - { - /* Set 0 uses mutable descriptors, and this set is unused. */ -- if (!descriptor_heap->vk_descriptor_sets[0].vk_set) -- d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, 0); -+ if (!descriptor_heap->vk_descriptor_sets[0].vk_set -+ && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, 0))) -+ return hr; - descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[0].vk_set; - descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; - return S_OK; --- -2.43.0 - diff --git a/staging/upstream-commit b/staging/upstream-commit index aa3bf229..56bd3786 100644 --- a/staging/upstream-commit +++ b/staging/upstream-commit @@ -1 +1 @@ -5f7b9a5b837a77a85754b3b55b974e39e199f817 +b6eada5e2f9e3c86c18df118cddd20e6212f64de