From a78e9f3998ea772e711fac6822655b4943da464c Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Sat, 18 May 2024 13:09:40 +1000 Subject: [PATCH] Updated vkd3d-latest patchset Squashed for release. --- ...9c83caeda652d7968c10e54cca2ae3b7fc1.patch} | 9455 ++++++++++++++--- ...-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch | 1024 -- ...-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch | 892 -- ...-13e1491941a1af32ddfc1019fa304231fd1.patch | 953 -- ...-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch | 1497 --- ...-4b3a948edcb5e83074b63aad25ecf450dca.patch | 3631 ------- ...-9e57039fce4040c8bfadaa73bf449c00591.patch | 937 -- ...-061dc390367b4c83022d5fe1255f8d38f6b.patch | 153 - 8 files changed, 8108 insertions(+), 10434 deletions(-) rename patches/vkd3d-latest/{0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch => 0001-Updated-vkd3d-to-9c83caeda652d7968c10e54cca2ae3b7fc1.patch} (65%) delete mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch delete mode 100644 patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch delete mode 100644 patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch delete mode 100644 patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch delete mode 100644 patches/vkd3d-latest/0006-Updated-vkd3d-to-4b3a948edcb5e83074b63aad25ecf450dca.patch delete mode 100644 patches/vkd3d-latest/0007-Updated-vkd3d-to-9e57039fce4040c8bfadaa73bf449c00591.patch delete mode 100644 patches/vkd3d-latest/0008-Updated-vkd3d-to-061dc390367b4c83022d5fe1255f8d38f6b.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-9c83caeda652d7968c10e54cca2ae3b7fc1.patch similarity index 65% rename from patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch rename to patches/vkd3d-latest/0001-Updated-vkd3d-to-9c83caeda652d7968c10e54cca2ae3b7fc1.patch index 152c2f01..17efdc20 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-9c83caeda652d7968c10e54cca2ae3b7fc1.patch @@ -1,43 +1,44 @@ -From ba798c39689ed7d2f3952a250825f0c3a0b4cf88 Mon Sep 17 00:00:00 2001 +From 1322caf7fdb70df029490d125cd7e1e244631a01 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Mar 2024 10:40:41 +1100 -Subject: [PATCH] Updated vkd3d to 4a209efb6278586d412ceb0a7cbe21e6769a7367. +Subject: [PATCH] Updated vkd3d to 9c83caeda652d7968c10e54cca2ae3b7fc18f384. --- libs/vkd3d/Makefile.in | 1 + - libs/vkd3d/include/private/vkd3d_common.h | 189 +- + libs/vkd3d/include/private/vkd3d_common.h | 261 +- libs/vkd3d/include/private/vkd3d_memory.h | 11 +- + libs/vkd3d/include/vkd3d.h | 206 +- libs/vkd3d/include/vkd3d_shader.h | 69 +- libs/vkd3d/include/vkd3d_types.h | 2 + libs/vkd3d/libs/vkd3d-common/blob.c | 3 +- libs/vkd3d/libs/vkd3d-common/debug.c | 2 +- libs/vkd3d/libs/vkd3d-common/error.c | 1 - - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 1151 +++--- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 165 +- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 1190 +++--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 290 +- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 39 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 1610 ++++++++- - libs/vkd3d/libs/vkd3d-shader/fx.c | 517 ++- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 108 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 495 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 120 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 2634 +++++++++++-- + libs/vkd3d/libs/vkd3d-shader/fx.c | 994 ++++- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 109 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 723 ++-- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 162 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 7 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 730 +++- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 535 ++- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 23 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 3172 +++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 383 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 229 +- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 280 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 118 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 904 +++-- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 861 ++++- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 317 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 3271 +++++++++++++---- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 727 +++- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 421 +-- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 324 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 192 +- libs/vkd3d/libs/vkd3d/cache.c | 59 + - libs/vkd3d/libs/vkd3d/command.c | 20 + - libs/vkd3d/libs/vkd3d/device.c | 732 +++- + libs/vkd3d/libs/vkd3d/command.c | 41 +- + libs/vkd3d/libs/vkd3d/device.c | 1034 +++++- libs/vkd3d/libs/vkd3d/resource.c | 21 +- - libs/vkd3d/libs/vkd3d/state.c | 34 +- - libs/vkd3d/libs/vkd3d/utils.c | 10 + + libs/vkd3d/libs/vkd3d/state.c | 36 +- + libs/vkd3d/libs/vkd3d/utils.c | 34 + libs/vkd3d/libs/vkd3d/vkd3d_main.c | 22 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 92 +- - 33 files changed, 8450 insertions(+), 2500 deletions(-) + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 176 +- + 34 files changed, 11642 insertions(+), 3502 deletions(-) create mode 100644 libs/vkd3d/libs/vkd3d/cache.c diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in @@ -53,7 +54,7 @@ index 448e9a0e61d..94e4833dc9a 100644 libs/vkd3d/device.c \ libs/vkd3d/resource.c \ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 6a3b530d868..f9df47d339c 100644 +index 6a3b530d868..b0e9230dab6 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -30,6 +30,9 @@ @@ -207,7 +208,16 @@ index 6a3b530d868..f9df47d339c 100644 return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; -@@ -305,6 +429,69 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) +@@ -216,6 +340,8 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) + return (x > y) - (x < y); + } + ++#define VKD3D_BITMAP_SIZE(x) (((x) + 0x1f) >> 5) ++ + static inline bool bitmap_clear(uint32_t *map, unsigned int idx) + { + return map[idx >> 5] &= ~(1u << (idx & 0x1f)); +@@ -305,6 +431,139 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) return vkd3d_atomic_add_fetch_u32(x, 1); } @@ -273,6 +283,76 @@ index 6a3b530d868..f9df47d339c 100644 + ERR("Failed to destroy the mutex, ret %d.\n", ret); +#endif +} ++ ++struct vkd3d_cond ++{ ++#ifdef _WIN32 ++ CONDITION_VARIABLE cond; ++#else ++ pthread_cond_t cond; ++#endif ++}; ++ ++static inline void vkd3d_cond_init(struct vkd3d_cond *cond) ++{ ++#ifdef _WIN32 ++ InitializeConditionVariable(&cond->cond); ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_init(&cond->cond, NULL))) ++ ERR("Failed to initialise the condition variable, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) ++{ ++#ifdef _WIN32 ++ WakeConditionVariable(&cond->cond); ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_signal(&cond->cond))) ++ ERR("Failed to signal the condition variable, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) ++{ ++#ifdef _WIN32 ++ WakeAllConditionVariable(&cond->cond); ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_broadcast(&cond->cond))) ++ ERR("Failed to broadcast the condition variable, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) ++{ ++#ifdef _WIN32 ++ if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) ++ ERR("Failed to wait on the condition variable, error %lu.\n", GetLastError()); ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_wait(&cond->cond, &lock->lock))) ++ ERR("Failed to wait on the condition variable, ret %d.\n", ret); ++#endif ++} ++ ++static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) ++{ ++#ifdef _WIN32 ++ /* Nothing to do. */ ++#else ++ int ret; ++ ++ if ((ret = pthread_cond_destroy(&cond->cond))) ++ ERR("Failed to destroy the condition variable, ret %d.\n", ret); ++#endif ++} + static inline void vkd3d_parse_version(const char *version, int *major, int *minor) { @@ -306,6 +386,324 @@ index 8a2edb1000d..682d35c03c6 100644 bool vkd3d_array_reserve(void **elements, size_t *capacity, size_t element_count, size_t element_size); #endif /* __VKD3D_MEMORY_H */ +diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h +index aa68b70e1bf..38249f0bf5c 100644 +--- a/libs/vkd3d/include/vkd3d.h ++++ b/libs/vkd3d/include/vkd3d.h +@@ -46,21 +46,37 @@ extern "C" { + * \since 1.0 + */ + ++/** The type of a chained structure. */ + enum vkd3d_structure_type + { +- /* 1.0 */ ++ /** The structure is a vkd3d_instance_create_info structure. */ + VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, ++ /** The structure is a vkd3d_device_create_info structure. */ + VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO, ++ /** The structure is a vkd3d_image_resource_create_info structure. */ + VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO, + +- /* 1.1 */ ++ /** ++ * The structure is a vkd3d_optional_instance_extensions_info structure. ++ * \since 1.1 ++ */ + VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO, + +- /* 1.2 */ ++ /** ++ * The structure is a vkd3d_optional_device_extensions_info structure. ++ * \since 1.2 ++ */ + VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO, ++ /** ++ * The structure is a vkd3d_application_info structure. ++ * \since 1.2 ++ */ + VKD3D_STRUCTURE_TYPE_APPLICATION_INFO, + +- /* 1.3 */ ++ /** ++ * The structure is a vkd3d_host_time_domain_info structure. ++ * \since 1.3 ++ */ + VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_STRUCTURE_TYPE), +@@ -93,98 +109,262 @@ typedef HRESULT (*PFN_vkd3d_join_thread)(void *thread); + + struct vkd3d_instance; + ++/** ++ * A chained structure containing instance creation parameters. ++ */ + struct vkd3d_instance_create_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** An pointer to a function to signal events. */ + PFN_vkd3d_signal_event pfn_signal_event; ++ /** ++ * An optional pointer to a function to create threads. If this is NULL vkd3d will use a ++ * function of its choice, depending on the platform. It must be NULL if and only if ++ * pfn_join_thread is NULL. ++ */ + PFN_vkd3d_create_thread pfn_create_thread; ++ /** ++ * An optional pointer to a function to join threads. If this is NULL vkd3d will use a ++ * function of its choice, depending on the platform. It must be NULL if and only if ++ * pfn_create_thread is NULL. ++ */ + PFN_vkd3d_join_thread pfn_join_thread; ++ /** The size of type WCHAR. It must be 2 or 4 and should normally be set to sizeof(WCHAR). */ + size_t wchar_size; + +- /* If set to NULL, libvkd3d loads libvulkan. */ ++ /** ++ * A pointer to the vkGetInstanceProcAddr Vulkan function, which will be used to load all the ++ * other Vulkan functions. If set to NULL, vkd3d will search and use the Vulkan loader. ++ */ + PFN_vkGetInstanceProcAddr pfn_vkGetInstanceProcAddr; + ++ /** ++ * A list of Vulkan instance extensions to request. They are intended as required, so instance ++ * creation will fail if any of them is not available. ++ */ + const char * const *instance_extensions; ++ /** The number of elements in the instance_extensions array. */ + uint32_t instance_extension_count; + }; + +-/* Extends vkd3d_instance_create_info. Available since 1.1. */ ++/** ++ * A chained structure to specify optional instance extensions. ++ * ++ * This structure extends vkd3d_instance_create_info. ++ * ++ * \since 1.1 ++ */ + struct vkd3d_optional_instance_extensions_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** ++ * A list of optional Vulkan instance extensions to request. Instance creation does not fail if ++ * they are not available. ++ */ + const char * const *extensions; ++ /** The number of elements in the extensions array. */ + uint32_t extension_count; + }; + +-/* Extends vkd3d_instance_create_info. Available since 1.2. */ ++/** ++ * A chained structure to specify application information. ++ * ++ * This structure extends vkd3d_instance_create_info. ++ * ++ * \since 1.2 ++ */ + struct vkd3d_application_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_APPLICATION_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** ++ * The application's name, to be passed to the Vulkan implementation. If it is NULL, a name is ++ * computed from the process executable filename. If that cannot be done, the empty string is ++ * used. ++ */ + const char *application_name; ++ /** The application's version, to be passed to the Vulkan implementation. */ + uint32_t application_version; + +- const char *engine_name; /* "vkd3d" if NULL */ +- uint32_t engine_version; /* vkd3d version if engine_name is NULL */ +- ++ /** ++ * The engine name, to be passed to the Vulkan implementation. If it is NULL, "vkd3d" is used. ++ */ ++ const char *engine_name; ++ /** ++ * The engine version, to be passed to the Vulkan implementation. If it is 0, the version is ++ * computed from the vkd3d library version. ++ */ ++ uint32_t engine_version; ++ ++ /** ++ * The vkd3d API version to use, to guarantee backward compatibility of the shared library. If ++ * this chained structure is not used then VKD3D_API_VERSION_1_0 is used. ++ */ + enum vkd3d_api_version api_version; + }; + +-/* Extends vkd3d_instance_create_info. Available since 1.3. */ ++/** ++ * A chained structure to specify the host time domain. ++ * ++ * This structure extends vkd3d_instance_create_info. ++ * ++ * \since 1.3 ++ */ + struct vkd3d_host_time_domain_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** ++ * The number of clock ticks per second, used for GetClockCalibration(). It should normally ++ * match the expected result of QueryPerformanceFrequency(). If this chained structure is not ++ * used then 10 millions is used, which means that each tick is a tenth of microsecond, or ++ * equivalently 100 nanoseconds. ++ */ + uint64_t ticks_per_second; + }; + ++/** ++ * A chained structure containing device creation parameters. ++ */ + struct vkd3d_device_create_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** The minimum feature level to request. Device creation will fail with E_INVALIDARG if the ++ * Vulkan device doesn't have the features needed to fulfill the request. */ + D3D_FEATURE_LEVEL minimum_feature_level; + ++ /** ++ * The vkd3d instance to use to create a device. Either this or instance_create_info must be ++ * set. ++ */ + struct vkd3d_instance *instance; ++ /** ++ * The parameters used to create an instance, which is then used to create a device. Either ++ * this or instance must be set. ++ */ + const struct vkd3d_instance_create_info *instance_create_info; + ++ /** ++ * The Vulkan physical device to use. If it is NULL, the first physical device found is used, ++ * prioritizing discrete GPUs over integrated GPUs and integrated GPUs over all the others. ++ * ++ * This parameter can be overridden by setting environment variable VKD3D_VULKAN_DEVICE. ++ */ + VkPhysicalDevice vk_physical_device; + ++ /** ++ * A list of Vulkan device extensions to request. They are intended as required, so device ++ * creation will fail if any of them is not available. ++ */ + const char * const *device_extensions; ++ /** The number of elements in the device_extensions array. */ + uint32_t device_extension_count; + ++ /** ++ * An object to be set as the device parent. This is not used by vkd3d except for being ++ * returned by vkd3d_get_device_parent. ++ */ + IUnknown *parent; ++ /** ++ * The adapter LUID to be set for the device. This is not used by vkd3d except for being ++ * returned by GetAdapterLuid. ++ */ + LUID adapter_luid; + }; + +-/* Extends vkd3d_device_create_info. Available since 1.2. */ ++/** ++ * A chained structure to specify optional device extensions. ++ * ++ * This structure extends vkd3d_device_create_info. ++ * ++ * \since 1.2 ++ */ + struct vkd3d_optional_device_extensions_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** ++ * A list of optional Vulkan device extensions to request. Device creation does not fail if ++ * they are not available. ++ */ + const char * const *extensions; ++ /** The number of elements in the extensions array. */ + uint32_t extension_count; + }; + +-/* vkd3d_image_resource_create_info flags */ ++/** ++ * When specified as a flag of vkd3d_image_resource_create_info, it means that vkd3d will do the ++ * initial transition operation on the image from VK_IMAGE_LAYOUT_UNDEFINED to its appropriate ++ * Vulkan layout (depending on its D3D12 resource state). If this flag is not specified the caller ++ * is responsible for transitioning the Vulkan image to the appropriate layout. ++ */ + #define VKD3D_RESOURCE_INITIAL_STATE_TRANSITION 0x00000001 ++/** ++ * When specified as a flag of vkd3d_image_resource_create_info, it means that field present_state ++ * is honored. ++ */ + #define VKD3D_RESOURCE_PRESENT_STATE_TRANSITION 0x00000002 + ++/** ++ * A chained structure containing the parameters to create a D3D12 resource backed by a Vulkan ++ * image. ++ */ + struct vkd3d_image_resource_create_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** The Vulkan image that backs the resource. */ + VkImage vk_image; ++ /** The resource description. */ + D3D12_RESOURCE_DESC desc; ++ /** ++ * A combination of zero or more flags. The valid flags are ++ * VKD3D_RESOURCE_INITIAL_STATE_TRANSITION and VKD3D_RESOURCE_PRESENT_STATE_TRANSITION. ++ */ + unsigned int flags; ++ /** ++ * This field specifies how to handle resource state D3D12_RESOURCE_STATE_PRESENT for ++ * the resource. Notice that on D3D12 there is no difference between ++ * D3D12_RESOURCE_STATE_COMMON and D3D12_RESOURCE_STATE_PRESENT (they have the same value), ++ * while on Vulkan two different layouts are used (VK_IMAGE_LAYOUT_GENERAL and ++ * VK_IMAGE_LAYOUT_PRESENT_SRC_KHR). ++ * ++ * * When flag VKD3D_RESOURCE_PRESENT_STATE_TRANSITION is not specified, field ++ * present_state is ignored and resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is ++ * mapped to VK_IMAGE_LAYOUT_GENERAL; this is useful for non-swapchain resources. ++ * * Otherwise, when present_state is D3D12_RESOURCE_STATE_PRESENT/_COMMON, resource state ++ * D3D12_RESOURCE_STATE_COMMON/_PRESENT is mapped to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; ++ * this is useful for swapchain resources that are directly backed by a Vulkan swapchain ++ * image. ++ * * Otherwise, resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is treated as resource ++ * state present_state; this is useful for swapchain resources that backed by a Vulkan ++ * non-swapchain image, which the client will likely consume with a copy or drawing ++ * operation at presentation time. ++ */ + D3D12_RESOURCE_STATES present_state; + }; + diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 9e663919c38..2b32b8a3e98 100644 --- a/libs/vkd3d/include/vkd3d_shader.h @@ -489,7 +887,7 @@ index 3572669ac1c..b8350a5404c 100644 HRESULT hresult_from_vkd3d_result(int vkd3d_result) { diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 3f86bd45960..cd8ba0a7d2b 100644 +index 3f86bd45960..9abc2c4db70 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -250,6 +250,7 @@ static const char * const shader_opcode_names[] = @@ -500,7 +898,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 [VKD3DSIH_PHASE ] = "phase", [VKD3DSIH_PHI ] = "phi", [VKD3DSIH_POW ] = "pow", -@@ -321,6 +322,7 @@ static const char * const shader_opcode_names[] = +@@ -321,44 +322,34 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_UMAX ] = "umax", [VKD3DSIH_UMIN ] = "umin", [VKD3DSIH_UMUL ] = "umul", @@ -508,7 +906,27 @@ index 3f86bd45960..cd8ba0a7d2b 100644 [VKD3DSIH_USHR ] = "ushr", [VKD3DSIH_UTOD ] = "utod", [VKD3DSIH_UTOF ] = "utof", -@@ -328,37 +330,6 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_UTOU ] = "utou", ++ [VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL ] = "wave_active_all_equal", ++ [VKD3DSIH_WAVE_ACTIVE_BALLOT ] = "wave_active_ballot", ++ [VKD3DSIH_WAVE_ACTIVE_BIT_AND ] = "wave_active_bit_and", ++ [VKD3DSIH_WAVE_ACTIVE_BIT_OR ] = "wave_active_bit_or", ++ [VKD3DSIH_WAVE_ACTIVE_BIT_XOR ] = "wave_active_bit_xor", ++ [VKD3DSIH_WAVE_ALL_BIT_COUNT ] = "wave_all_bit_count", ++ [VKD3DSIH_WAVE_ALL_TRUE ] = "wave_all_true", ++ [VKD3DSIH_WAVE_ANY_TRUE ] = "wave_any_true", ++ [VKD3DSIH_WAVE_IS_FIRST_LANE ] = "wave_is_first_lane", ++ [VKD3DSIH_WAVE_OP_ADD ] = "wave_op_add", ++ [VKD3DSIH_WAVE_OP_IMAX ] = "wave_op_imax", ++ [VKD3DSIH_WAVE_OP_IMIN ] = "wave_op_imin", ++ [VKD3DSIH_WAVE_OP_MAX ] = "wave_op_max", ++ [VKD3DSIH_WAVE_OP_MIN ] = "wave_op_min", ++ [VKD3DSIH_WAVE_OP_MUL ] = "wave_op_mul", ++ [VKD3DSIH_WAVE_OP_UMAX ] = "wave_op_umax", ++ [VKD3DSIH_WAVE_OP_UMIN ] = "wave_op_umin", ++ [VKD3DSIH_WAVE_PREFIX_BIT_COUNT ] = "wave_prefix_bit_count", ++ [VKD3DSIH_WAVE_READ_LANE_AT ] = "wave_read_lane_at", ++ [VKD3DSIH_WAVE_READ_LANE_FIRST ] = "wave_read_lane_first", [VKD3DSIH_XOR ] = "xor", }; @@ -546,7 +964,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 struct vkd3d_d3d_asm_colours { const char *reset; -@@ -370,6 +341,7 @@ struct vkd3d_d3d_asm_colours +@@ -370,6 +361,7 @@ struct vkd3d_d3d_asm_colours const char *swizzle; const char *version; const char *write_mask; @@ -554,7 +972,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 }; struct vkd3d_d3d_asm_compiler -@@ -377,22 +349,10 @@ struct vkd3d_d3d_asm_compiler +@@ -377,22 +369,10 @@ struct vkd3d_d3d_asm_compiler struct vkd3d_string_buffer buffer; struct vkd3d_shader_version shader_version; struct vkd3d_d3d_asm_colours colours; @@ -578,7 +996,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 /* Convert floating point offset relative to a register file to an absolute * offset for float constants. */ static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx) -@@ -445,6 +405,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, +@@ -445,6 +425,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); } @@ -602,7 +1020,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) { if (sync_flags & VKD3DSSF_GLOBAL_UAV) -@@ -511,96 +488,138 @@ static void shader_dump_uav_flags(struct vkd3d_d3d_asm_compiler *compiler, uint3 +@@ -511,96 +508,138 @@ static void shader_dump_uav_flags(struct vkd3d_d3d_asm_compiler *compiler, uint3 vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", uav_flags); } @@ -778,7 +1196,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, enum vkd3d_shader_resource_type type) -@@ -646,6 +665,8 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum +@@ -646,6 +685,8 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum [VKD3D_DATA_UINT8 ] = "uint8", [VKD3D_DATA_UINT64 ] = "uint64", [VKD3D_DATA_BOOL ] = "bool", @@ -787,7 +1205,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 }; const char *name; -@@ -673,128 +694,133 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil +@@ -673,128 +714,133 @@ static void shader_dump_resource_data_type(struct vkd3d_d3d_asm_compiler *compil vkd3d_string_buffer_printf(&compiler->buffer, ")"); } @@ -874,7 +1292,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 - case VKD3D_DECL_USAGE_BLEND_INDICES: - shader_addline(buffer, "blend"); - break; -- + - case VKD3D_DECL_USAGE_BLEND_WEIGHT: - shader_addline(buffer, "weight"); - break; @@ -917,7 +1335,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 - case VKD3D_DECL_USAGE_FOG: - shader_addline(buffer, "fog"); - break; - +- - case VKD3D_DECL_USAGE_DEPTH: - shader_addline(buffer, "depth"); - break; @@ -1009,7 +1427,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 static void shader_print_float_literal(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, float f, const char *suffix) -@@ -891,13 +917,9 @@ static void shader_print_untyped_literal(struct vkd3d_d3d_asm_compiler *compiler +@@ -891,13 +937,9 @@ static void shader_print_untyped_literal(struct vkd3d_d3d_asm_compiler *compiler static void shader_print_subscript(struct vkd3d_d3d_asm_compiler *compiler, unsigned int offset, const struct vkd3d_shader_src_param *rel_addr) { @@ -1025,7 +1443,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler, -@@ -910,8 +932,8 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler +@@ -910,8 +952,8 @@ static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler vkd3d_string_buffer_printf(&compiler->buffer, "*]"); } @@ -1036,7 +1454,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 { struct vkd3d_string_buffer *buffer = &compiler->buffer; unsigned int offset = reg->idx[0].offset; -@@ -920,22 +942,23 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -920,22 +962,23 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; static const char * const misctype_reg_names[] = {"vPos", "vFace"}; @@ -1064,7 +1482,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 offset = shader_get_float_offset(reg->type, offset); break; -@@ -945,205 +968,202 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -945,205 +988,210 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const break; case VKD3DSPR_RASTOUT: @@ -1311,6 +1729,14 @@ index 3f86bd45960..cd8ba0a7d2b 100644 case VKD3DSPR_SSA: - shader_addline(buffer, "sr"); + vkd3d_string_buffer_printf(buffer, "sr"); ++ break; ++ ++ case VKD3DSPR_WAVELANECOUNT: ++ vkd3d_string_buffer_printf(buffer, "vWaveLaneCount"); ++ break; ++ ++ case VKD3DSPR_WAVELANEINDEX: ++ vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); break; default: @@ -1320,7 +1746,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 break; } -@@ -1162,7 +1182,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -1162,7 +1210,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const break; } @@ -1329,7 +1755,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 switch (reg->dimension) { case VSIR_DIMENSION_SCALAR: -@@ -1183,7 +1203,8 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -1183,7 +1231,8 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); break; default: @@ -1339,7 +1765,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 break; } break; -@@ -1222,20 +1243,22 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -1222,20 +1271,22 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[3], ""); break; default: @@ -1366,7 +1792,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 /* A double2 vector is treated as a float4 vector in enum vsir_dimension. */ if (reg->dimension == VSIR_DIMENSION_SCALAR || reg->dimension == VSIR_DIMENSION_VEC4) { -@@ -1253,14 +1276,16 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -1253,14 +1304,16 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const } else { @@ -1386,7 +1812,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } else if (reg->type != VKD3DSPR_RASTOUT && reg->type != VKD3DSPR_MISCTYPE -@@ -1304,7 +1329,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -1304,7 +1357,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const } else { @@ -1395,7 +1821,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } if (reg->type == VKD3DSPR_FUNCTIONPOINTER) -@@ -1312,8 +1337,9 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -1312,8 +1365,9 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const } else { @@ -1406,7 +1832,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } static void shader_print_precision(struct vkd3d_d3d_asm_compiler *compiler, const struct vkd3d_shader_register *reg) -@@ -1357,8 +1383,8 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co +@@ -1357,8 +1411,8 @@ static void shader_print_non_uniform(struct vkd3d_d3d_asm_compiler *compiler, co compiler->colours.modifier, compiler->colours.reset); } @@ -1417,7 +1843,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 { static const char *dimensions[] = { -@@ -1370,7 +1396,13 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1370,7 +1424,13 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, struct vkd3d_string_buffer *buffer = &compiler->buffer; const char *dimension; @@ -1432,7 +1858,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 return; if (reg->dimension < ARRAY_SIZE(dimensions)) -@@ -1378,83 +1410,114 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1378,83 +1438,114 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, else dimension = "??"; @@ -1587,7 +2013,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } if (param->reg.type != VKD3DSPR_IMMCONST && param->reg.type != VKD3DSPR_IMMCONST64 -@@ -1472,26 +1535,21 @@ static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1472,26 +1563,21 @@ static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, swizzle_z = vsir_swizzle_get_component(swizzle, 2); swizzle_w = vsir_swizzle_get_component(swizzle, 3); @@ -1621,7 +2047,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, -@@ -1502,105 +1560,129 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1502,105 +1588,129 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, switch (dst->shift) { @@ -1792,7 +2218,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } const char *shader_get_type_prefix(enum vkd3d_shader_type type) -@@ -1654,9 +1736,15 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile +@@ -1654,9 +1764,15 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile case VKD3DSIH_RETP: switch (ins->flags) { @@ -1811,7 +2237,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } break; -@@ -1664,49 +1752,88 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile +@@ -1664,49 +1780,99 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile case VKD3DSIH_BREAKC: switch (ins->flags) { @@ -1905,6 +2331,17 @@ index 3f86bd45960..cd8ba0a7d2b 100644 if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) - shader_addline(buffer, "p"); + vkd3d_string_buffer_printf(buffer, "p"); ++ break; ++ ++ case VKD3DSIH_WAVE_OP_ADD: ++ case VKD3DSIH_WAVE_OP_IMAX: ++ case VKD3DSIH_WAVE_OP_IMIN: ++ case VKD3DSIH_WAVE_OP_MAX: ++ case VKD3DSIH_WAVE_OP_MIN: ++ case VKD3DSIH_WAVE_OP_MUL: ++ case VKD3DSIH_WAVE_OP_UMAX: ++ case VKD3DSIH_WAVE_OP_UMIN: ++ vkd3d_string_buffer_printf(&compiler->buffer, (ins->flags & VKD3DSI_WAVE_PREFIX) ? "_prefix" : "_active"); break; case VKD3DSIH_ISHL: @@ -1916,7 +2353,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 /* fall through */ default: shader_dump_precise_flags(compiler, ins->flags); -@@ -1753,7 +1880,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1753,7 +1919,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, shader_print_hex_literal(compiler, ", ", icb->data[4 * i + 3], "},\n"); } } @@ -1925,7 +2362,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, -@@ -1765,11 +1892,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1765,11 +1931,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, compiler->current = ins; if (ins->predicate) @@ -1938,7 +2375,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 /* PixWin marks instructions with the coissue flag with a '+' */ if (ins->coissue) -@@ -1782,21 +1905,20 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1782,21 +1944,20 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL: case VKD3DSIH_DCL_UAV_TYPED: vkd3d_string_buffer_printf(buffer, "%s", compiler->colours.opcode); @@ -1965,7 +2402,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 ins->flags & VKD3DSI_INDEXED_DYNAMIC ? "dynamicIndexed" : "immediateIndexed"); shader_dump_register_space(compiler, ins->declaration.cb.range.space); break; -@@ -1823,8 +1945,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1823,8 +1984,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_INDEX_RANGE: @@ -1975,7 +2412,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 shader_print_uint_literal(compiler, " ", ins->declaration.index_range.register_count, ""); break; -@@ -1840,41 +1961,32 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1840,41 +2000,32 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_INPUT_PS: @@ -2026,7 +2463,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 break; case VKD3DSIH_DCL_INTERFACE: -@@ -1885,23 +1997,19 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1885,23 +2036,19 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_RESOURCE_RAW: @@ -2054,7 +2491,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 shader_dump_register_space(compiler, ins->declaration.sampler.range.space); break; -@@ -1916,29 +2024,24 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1916,29 +2063,24 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: @@ -2089,7 +2526,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.byte_stride, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.structure_count, ""); break; -@@ -1951,15 +2054,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1951,15 +2093,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL_UAV_RAW: shader_dump_uav_flags(compiler, ins->flags); @@ -2107,7 +2544,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); break; -@@ -1994,7 +2095,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -1994,7 +2134,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, if (ins->resource_type != VKD3D_SHADER_RESOURCE_NONE) { @@ -2116,7 +2553,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 if (ins->raw) vkd3d_string_buffer_printf(buffer, "raw_"); if (ins->structured) -@@ -2002,7 +2103,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -2002,7 +2142,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, shader_dump_resource_type(compiler, ins->resource_type); if (ins->resource_stride) shader_print_uint_literal(compiler, ", stride=", ins->resource_stride, ""); @@ -2125,7 +2562,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 } if (vkd3d_shader_instruction_has_texel_offset(ins)) -@@ -2021,37 +2122,200 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, +@@ -2021,37 +2161,200 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, for (i = 0; i < ins->dst_count; ++i) { shader_dump_ins_modifiers(compiler, &ins->dst[i]); @@ -2192,9 +2629,8 @@ index 3f86bd45960..cd8ba0a7d2b 100644 + case VKD3D_SHADER_COMPONENT_UINT64: return "uint64"; + default: return "??"; + } - } - --enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, ++} ++ +static const char *get_minimum_precision_name(enum vkd3d_shader_minimum_precision prec) +{ + switch (prec) @@ -2221,8 +2657,9 @@ index 3f86bd45960..cd8ba0a7d2b 100644 + case VKD3D_SHADER_SV_STENCIL_REF: return "oStencilRef"; + default: return "??"; + } -+} -+ + } + +-enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, + const char *name, const char *register_name, const struct shader_signature *signature) +{ @@ -2335,7 +2772,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 static const struct vkd3d_d3d_asm_colours no_colours = { -@@ -2064,6 +2328,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +@@ -2064,6 +2367,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, .swizzle = "", .version = "", .write_mask = "", @@ -2343,7 +2780,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 }; static const struct vkd3d_d3d_asm_colours colours = { -@@ -2076,6 +2341,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +@@ -2076,6 +2380,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, .swizzle = "\x1b[93m", .version = "\x1b[36m", .write_mask = "\x1b[93m", @@ -2351,7 +2788,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 }; formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT -@@ -2109,6 +2375,17 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +@@ -2109,6 +2414,17 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, shader_get_type_prefix(shader_version->type), shader_version->major, shader_version->minor, compiler.colours.reset); @@ -2369,7 +2806,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 indent = 0; for (i = 0; i < program->instructions.count; ++i) { -@@ -2124,6 +2401,14 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +@@ -2124,6 +2440,14 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, --indent; break; @@ -2384,7 +2821,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 default: break; } -@@ -2142,6 +2427,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +@@ -2142,6 +2466,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, case VKD3DSIH_IFC: case VKD3DSIH_LOOP: case VKD3DSIH_SWITCH: @@ -2392,7 +2829,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 ++indent; break; -@@ -2150,18 +2436,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +@@ -2150,18 +2475,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, } } @@ -2412,7 +2849,7 @@ index 3f86bd45960..cd8ba0a7d2b 100644 return result; } -@@ -2171,7 +2446,7 @@ void vkd3d_shader_trace(const struct vsir_program *program) +@@ -2171,7 +2485,7 @@ void vkd3d_shader_trace(const struct vsir_program *program) const char *p, *q, *end; struct vkd3d_shader_code code; @@ -2422,42 +2859,206 @@ index 3f86bd45960..cd8ba0a7d2b 100644 end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 27f5c810436..ace7694a59e 100644 +index 27f5c810436..cda73d48fc0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -541,9 +541,9 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp +@@ -215,8 +215,12 @@ struct vkd3d_shader_sm1_parser + + struct vkd3d_shader_parser p; + ++ struct ++ { + #define MAX_CONSTANT_COUNT 8192 +- uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; ++ uint32_t def_mask[VKD3D_BITMAP_SIZE(MAX_CONSTANT_COUNT)]; ++ uint32_t count; ++ } constants[3]; + }; + + /* This table is not order or position dependent. */ +@@ -392,11 +396,6 @@ static const enum vkd3d_shader_resource_type resource_type_table[] = + /* VKD3D_SM1_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, + }; + +-static struct vkd3d_shader_sm1_parser *vkd3d_shader_sm1_parser(struct vkd3d_shader_parser *parser) +-{ +- return CONTAINING_RECORD(parser, struct vkd3d_shader_sm1_parser, p); +-} +- + static uint32_t read_u32(const uint32_t **ptr) + { + return *(*ptr)++; +@@ -414,7 +413,7 @@ static bool has_relative_address(uint32_t param) + static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info( + const struct vkd3d_shader_sm1_parser *sm1, enum vkd3d_sm1_opcode opcode) + { +- const struct vkd3d_shader_version *version = &sm1->p.program.shader_version; ++ const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; + const struct vkd3d_sm1_opcode_info *info; + unsigned int i = 0; + +@@ -537,13 +536,14 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp + const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, + unsigned int register_index, bool is_dcl, unsigned int mask) + { ++ struct vsir_program *program = sm1->p.program; + struct shader_signature *signature; struct signature_element *element; if (output) - signature = &sm1->p.shader_desc.output_signature; -+ signature = &sm1->p.program.output_signature; ++ signature = &program->output_signature; else - signature = &sm1->p.shader_desc.input_signature; -+ signature = &sm1->p.program.input_signature; ++ signature = &program->input_signature; if ((element = find_signature_element(signature, name, index))) { -@@ -581,9 +581,9 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, +@@ -568,7 +568,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp + element->register_count = 1; + element->mask = mask; + element->used_mask = is_dcl ? 0 : mask; +- if (sm1->p.program.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) + element->interpolation_mode = VKD3DSIM_LINEAR; + + return true; +@@ -577,13 +577,14 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp + static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, + unsigned int register_index, unsigned int mask) + { ++ struct vsir_program *program = sm1->p.program; + struct shader_signature *signature; struct signature_element *element; if (output) - signature = &sm1->p.shader_desc.output_signature; -+ signature = &sm1->p.program.output_signature; ++ signature = &program->output_signature; else - signature = &sm1->p.shader_desc.input_signature; -+ signature = &sm1->p.program.input_signature; ++ signature = &program->input_signature; if (!(element = find_signature_element_by_register_index(signature, register_index))) { -@@ -886,7 +886,6 @@ static void shader_sm1_destroy(struct vkd3d_shader_parser *parser) - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); +@@ -598,7 +599,7 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, + static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) + { +- const struct vkd3d_shader_version *version = &sm1->p.program.shader_version; ++ const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; + unsigned int register_index = reg->idx[0].offset; - vsir_program_cleanup(&parser->program); -- free_shader_desc(&sm1->p.shader_desc); - vkd3d_free(sm1); + switch (reg->type) +@@ -701,7 +702,7 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * + static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_semantic *semantic) + { +- const struct vkd3d_shader_version *version = &sm1->p.program.shader_version; ++ const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; + const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + unsigned int mask = semantic->resource.reg.write_mask; +@@ -750,22 +751,20 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * + static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, + enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) + { +- struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; +- +- desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); ++ sm1->constants[set].count = max(sm1->constants[set].count, index + 1); + if (from_def) + { + /* d3d shaders have a maximum of 8192 constants; we should not overrun + * this array. */ +- assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); +- bitmap_set(sm1->constant_def_mask[set], index); ++ assert((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); ++ bitmap_set(sm1->constants[set].def_mask, index); + } } -@@ -1237,7 +1236,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, + static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) + { +- struct vsir_program *program = &sm1->p.program; ++ struct vsir_program *program = sm1->p.program; + uint32_t register_index = reg->idx[0].offset; + + switch (reg->type) +@@ -826,7 +825,7 @@ static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, + * VS >= 2.0 have relative addressing (with token) + * VS >= 1.0 < 2.0 have relative addressing (without token) + * The version check below should work in general. */ +- if (sm1->p.program.shader_version.major < 2) ++ if (sm1->p.program->shader_version.major < 2) + { + *addr_token = (1u << 31) + | ((VKD3DSPR_ADDR << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2) +@@ -855,7 +854,7 @@ static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, co + /* Version 2.0+ shaders may contain address tokens, but fortunately they + * have a useful length mask - use it here. Version 1.x shaders contain no + * such tokens. */ +- if (sm1->p.program.shader_version.major >= 2) ++ if (sm1->p.program->shader_version.major >= 2) + { + length = (opcode_token & VKD3D_SM1_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; + *ptr += length; +@@ -881,15 +880,6 @@ static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, co + *ptr += (opcode_info->dst_count + opcode_info->src_count); + } + +-static void shader_sm1_destroy(struct vkd3d_shader_parser *parser) +-{ +- struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); +- +- vsir_program_cleanup(&parser->program); +- free_shader_desc(&sm1->p.shader_desc); +- vkd3d_free(sm1); +-} +- + static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, + struct vkd3d_shader_src_param *src_param) + { +@@ -899,7 +889,7 @@ static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const + shader_sm1_read_param(sm1, ptr, &token, &addr_token); + if (has_relative_address(token)) + { +- if (!(src_rel_addr = vsir_program_get_src_params(&sm1->p.program, 1))) ++ if (!(src_rel_addr = vsir_program_get_src_params(sm1->p.program, 1))) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, + "Out of memory."); +@@ -920,7 +910,7 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const + shader_sm1_read_param(sm1, ptr, &token, &addr_token); + if (has_relative_address(token)) + { +- if (!(dst_rel_addr = vsir_program_get_src_params(&sm1->p.program, 1))) ++ if (!(dst_rel_addr = vsir_program_get_src_params(sm1->p.program, 1))) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, + "Out of memory."); +@@ -1089,7 +1079,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + { + struct vkd3d_shader_src_param *src_params, *predicate; + const struct vkd3d_sm1_opcode_info *opcode_info; +- struct vsir_program *program = &sm1->p.program; ++ struct vsir_program *program = sm1->p.program; + struct vkd3d_shader_dst_param *dst_param; + const uint32_t **ptr = &sm1->ptr; + uint32_t opcode_token; +@@ -1226,18 +1216,12 @@ static bool shader_sm1_is_end(struct vkd3d_shader_sm1_parser *sm1) + return false; + } + +-const struct vkd3d_shader_parser_ops shader_sm1_parser_ops = +-{ +- .parser_destroy = shader_sm1_destroy, +-}; +- +-static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, ++static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) + { const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; const uint32_t *code = compile_info->source.code; size_t code_size = compile_info->source.size; @@ -2465,26 +3066,119 @@ index 27f5c810436..ace7694a59e 100644 struct vkd3d_shader_version version; uint16_t shader_type; size_t token_count; -@@ -1290,9 +1288,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, - if (!vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops, - code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) +@@ -1287,12 +1271,10 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, + sm1->end = &code[token_count]; + + /* Estimate instruction count to avoid reallocation in most shaders. */ +- if (!vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops, +- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) ++ if (!vsir_program_init(program, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) return VKD3D_ERROR_OUT_OF_MEMORY; - shader_desc = &sm1->p.shader_desc; - shader_desc->byte_code = code; - shader_desc->byte_code_size = code_size; ++ ++ vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); sm1->ptr = sm1->start; return VKD3D_OK; -@@ -1363,7 +1358,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); +@@ -1306,77 +1288,68 @@ static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, + /* Find the highest constant index which is not written by a DEF + * instruction. We can't (easily) use an FFZ function for this since it + * needs to be limited by the highest used register index. */ +- for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) ++ for (j = sm1->constants[set].count; j > 0; --j) + { +- if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) ++ if (!bitmap_is_set(sm1->constants[set].def_mask, j - 1)) + return j; + } - if (!sm1->p.failed) + return 0; + } + +-int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) ++int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, ++ struct vkd3d_shader_message_context *message_context, struct vsir_program *program) + { + struct vkd3d_shader_instruction_array *instructions; ++ struct vkd3d_shader_sm1_parser sm1 = {0}; + struct vkd3d_shader_instruction *ins; +- struct vkd3d_shader_sm1_parser *sm1; + unsigned int i; + int ret; + +- if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) +- { +- ERR("Failed to allocate parser.\n"); +- return VKD3D_ERROR_OUT_OF_MEMORY; +- } +- +- if ((ret = shader_sm1_init(sm1, compile_info, message_context)) < 0) ++ if ((ret = shader_sm1_init(&sm1, program, compile_info, message_context)) < 0) + { + WARN("Failed to initialise shader parser, ret %d.\n", ret); +- vkd3d_free(sm1); + return ret; + } + +- instructions = &sm1->p.program.instructions; +- while (!shader_sm1_is_end(sm1)) ++ instructions = &program->instructions; ++ while (!shader_sm1_is_end(&sm1)) + { + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ERR("Failed to allocate instructions.\n"); +- vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); +- shader_sm1_destroy(&sm1->p); ++ vkd3d_shader_parser_error(&sm1.p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); ++ vsir_program_cleanup(program); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ins = &instructions->elements[instructions->count]; +- shader_sm1_read_instruction(sm1, ins); ++ shader_sm1_read_instruction(&sm1, ins); + + if (ins->handler_idx == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); +- shader_sm1_destroy(&sm1->p); ++ vsir_program_cleanup(program); + return VKD3D_ERROR_INVALID_SHADER; + } + ++instructions->count; + } + +- for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) +- sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); ++ for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) ++ program->flat_constant_count[i] = get_external_constant_count(&sm1, i); + +- if (!sm1->p.failed) - ret = vsir_validate(&sm1->p); -+ ret = vkd3d_shader_parser_validate(&sm1->p); ++ if (!sm1.p.failed) ++ ret = vkd3d_shader_parser_validate(&sm1.p, config_flags); - if (sm1->p.failed && ret >= 0) +- if (sm1->p.failed && ret >= 0) ++ if (sm1.p.failed && ret >= 0) ret = VKD3D_ERROR_INVALID_SHADER; -@@ -1499,47 +1494,68 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns + + if (ret < 0) + { + WARN("Failed to parse shader.\n"); +- shader_sm1_destroy(&sm1->p); ++ vsir_program_cleanup(program); + return ret; + } + +- *parser = &sm1->p; +- + return ret; + } + +@@ -1499,47 +1472,74 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns return D3DPS_VERSION(major, minor); } @@ -2513,11 +3207,17 @@ index 27f5c810436..ace7694a59e 100644 - default: - ERR("Invalid class %#x.\n", type->class); - vkd3d_unreachable(); -+ case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_VERTEX_SHADER: + return D3DXPC_OBJECT; ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: + break; @@ -2546,7 +3246,7 @@ index 27f5c810436..ace7694a59e 100644 + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: -+ switch (type->base_type) ++ switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; @@ -2573,7 +3273,7 @@ index 27f5c810436..ace7694a59e 100644 switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: -@@ -1557,9 +1573,8 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +@@ -1557,9 +1557,8 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) vkd3d_unreachable(); } break; @@ -2585,37 +3285,36 @@ index 27f5c810436..ace7694a59e 100644 switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: -@@ -1577,13 +1592,34 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +@@ -1577,13 +1576,33 @@ static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) vkd3d_unreachable(); } break; - case HLSL_TYPE_VERTEXSHADER: - return D3DXPT_VERTEXSHADER; - case HLSL_TYPE_VOID: -- return D3DXPT_VOID; -- default: -+ -+ case HLSL_CLASS_OBJECT: -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_PIXELSHADER: -+ return D3DXPT_PIXELSHADER; -+ case HLSL_TYPE_VERTEXSHADER: -+ return D3DXPT_VERTEXSHADER; -+ default: -+ vkd3d_unreachable(); -+ } - vkd3d_unreachable(); + + case HLSL_CLASS_ARRAY: + return hlsl_sm1_base_type(type->e.array.type); + + case HLSL_CLASS_STRUCT: -+ return D3DXPT_VOID; + return D3DXPT_VOID; +- default: +- vkd3d_unreachable(); + + case HLSL_CLASS_STRING: + return D3DXPT_STRING; + ++ case HLSL_CLASS_PIXEL_SHADER: ++ return D3DXPT_PIXELSHADER; ++ ++ case HLSL_CLASS_VERTEX_SHADER: ++ return D3DXPT_VERTEXSHADER; ++ ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: + break; @@ -2625,7 +3324,7 @@ index 27f5c810436..ace7694a59e 100644 } static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) -@@ -1620,7 +1656,7 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ +@@ -1620,7 +1639,7 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ } } @@ -2634,7 +3333,52 @@ index 27f5c810436..ace7694a59e 100644 put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); put_u32(buffer, vkd3d_make_u32(array_size, field_count)); put_u32(buffer, fields_offset); -@@ -1977,16 +2013,13 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b +@@ -1670,7 +1689,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { +- if (var->semantic.name || !var->regs[r].allocated) ++ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + ++uniform_count; +@@ -1708,14 +1727,14 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { +- if (var->semantic.name || !var->regs[r].allocated) ++ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + put_u32(buffer, 0); /* name */ + if (r == HLSL_REGSET_NUMERIC) + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); +- put_u32(buffer, var->data_type->reg_size[r] / 4); ++ put_u32(buffer, var->bind_count[r]); + } + else + { +@@ -1737,7 +1756,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + { + size_t var_offset, name_offset; + +- if (var->semantic.name || !var->regs[r].allocated) ++ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); +@@ -1969,24 +1988,21 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == dst_type->dimx); + +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- switch (src_type->base_type) ++ switch (src_type->e.numeric.type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: @@ -2653,7 +3397,12 @@ index 27f5c810436..ace7694a59e 100644 case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); break; -@@ -2002,7 +2035,10 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b +@@ -1998,11 +2014,14 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- switch(src_type->base_type) ++ switch(src_type->e.numeric.type) { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: @@ -2665,7 +3414,7 @@ index 27f5c810436..ace7694a59e 100644 case HLSL_TYPE_INT: case HLSL_TYPE_UINT: write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -@@ -2067,6 +2103,9 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b +@@ -2067,6 +2086,9 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b D3DDECLUSAGE usage; bool ret; @@ -2675,7 +3424,7 @@ index 27f5c810436..ace7694a59e 100644 if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) { usage = 0; -@@ -2242,6 +2281,12 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b +@@ -2242,13 +2264,19 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b assert(instr->reg.allocated); @@ -2688,7 +3437,15 @@ index 27f5c810436..ace7694a59e 100644 if (expr->op == HLSL_OP1_CAST) { write_sm1_cast(ctx, buffer, instr); -@@ -2329,7 +2374,23 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + return; + } + +- if (instr->data_type->base_type != HLSL_TYPE_FLOAT) ++ if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); +@@ -2329,7 +2357,23 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } break; @@ -2712,7 +3469,7 @@ index 27f5c810436..ace7694a59e 100644 write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break; -@@ -2488,7 +2549,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * +@@ -2488,7 +2532,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) { @@ -2721,7 +3478,7 @@ index 27f5c810436..ace7694a59e 100644 return; } -@@ -2552,19 +2613,11 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b +@@ -2552,19 +2596,11 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b { if (instr->data_type) { @@ -2843,10 +3600,55 @@ index 37ebc73c099..8a1012d909b 100644 return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 26a8a5c1cc3..220ba773887 100644 +index 26a8a5c1cc3..4943a586680 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -342,6 +342,8 @@ enum dx_intrinsic_opcode +@@ -31,12 +31,16 @@ static const uint64_t GLOBALVAR_FLAG_EXPLICIT_TYPE = 2; + static const unsigned int GLOBALVAR_ADDRESS_SPACE_SHIFT = 2; + static const uint64_t ALLOCA_FLAG_IN_ALLOCA = 0x20; + static const uint64_t ALLOCA_FLAG_EXPLICIT_TYPE = 0x40; +-static const uint64_t ALLOCA_ALIGNMENT_MASK = ALLOCA_FLAG_IN_ALLOCA - 1; ++static const uint64_t ALLOCA_ALIGNMENT_MASK = 0x1f; + static const unsigned int SHADER_DESCRIPTOR_TYPE_COUNT = 4; + static const size_t MAX_IR_INSTRUCTIONS_PER_DXIL_INSTRUCTION = 11; + + static const unsigned int dx_max_thread_group_size[3] = {1024, 1024, 64}; + ++static const unsigned int MAX_GS_INSTANCE_COUNT = 32; /* kMaxGSInstanceCount */ ++static const unsigned int MAX_GS_OUTPUT_TOTAL_SCALARS = 1024; /* kMaxGSOutputTotalScalars */ ++static const unsigned int MAX_GS_OUTPUT_STREAMS = 4; ++ + #define VKD3D_SHADER_SWIZZLE_64_MASK \ + (VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(0) \ + | VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(1)) +@@ -103,6 +107,7 @@ enum bitcode_constant_code + CST_CODE_INTEGER = 4, + CST_CODE_FLOAT = 6, + CST_CODE_STRING = 8, ++ CST_CODE_CE_CAST = 11, + CST_CODE_CE_GEP = 12, + CST_CODE_CE_INBOUNDS_GEP = 20, + CST_CODE_DATA = 22, +@@ -282,6 +287,18 @@ enum dxil_element_additional_tag + ADDITIONAL_TAG_USED_MASK = 3, + }; + ++enum dxil_input_primitive ++{ ++ INPUT_PRIMITIVE_UNDEFINED = 0, ++ INPUT_PRIMITIVE_POINT = 1, ++ INPUT_PRIMITIVE_LINE = 2, ++ INPUT_PRIMITIVE_TRIANGLE = 3, ++ INPUT_PRIMITIVE_LINEWITHADJACENCY = 6, ++ INPUT_PRIMITIVE_TRIANGLEWITHADJACENY = 7, ++ INPUT_PRIMITIVE_PATCH1 = 8, ++ INPUT_PRIMITIVE_PATCH32 = 39, ++}; ++ + enum dxil_shader_properties_tag + { + SHADER_PROPERTIES_FLAGS = 0, +@@ -342,6 +359,8 @@ enum dx_intrinsic_opcode { DX_LOAD_INPUT = 4, DX_STORE_OUTPUT = 5, @@ -2855,7 +3657,7 @@ index 26a8a5c1cc3..220ba773887 100644 DX_ISNAN = 8, DX_ISINF = 9, DX_ISFINITE = 10, -@@ -374,8 +376,15 @@ enum dx_intrinsic_opcode +@@ -374,8 +393,15 @@ enum dx_intrinsic_opcode DX_IMIN = 38, DX_UMAX = 39, DX_UMIN = 40, @@ -2871,7 +3673,7 @@ index 26a8a5c1cc3..220ba773887 100644 DX_CREATE_HANDLE = 57, DX_CBUFFER_LOAD_LEGACY = 59, DX_SAMPLE = 60, -@@ -388,14 +397,29 @@ enum dx_intrinsic_opcode +@@ -388,16 +414,54 @@ enum dx_intrinsic_opcode DX_TEXTURE_STORE = 67, DX_BUFFER_LOAD = 68, DX_BUFFER_STORE = 69, @@ -2891,17 +3693,42 @@ index 26a8a5c1cc3..220ba773887 100644 DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, DX_DERIV_FINEY = 86, ++ DX_COVERAGE = 91, + DX_THREAD_ID = 93, + DX_GROUP_ID = 94, + DX_THREAD_ID_IN_GROUP = 95, + DX_FLATTENED_THREAD_ID_IN_GROUP = 96, ++ DX_EMIT_STREAM = 97, ++ DX_CUT_STREAM = 98, ++ DX_EMIT_THEN_CUT_STREAM = 99, + DX_MAKE_DOUBLE = 101, DX_SPLIT_DOUBLE = 102, ++ DX_LOAD_OUTPUT_CONTROL_POINT = 103, ++ DX_LOAD_PATCH_CONSTANT = 104, ++ DX_DOMAIN_LOCATION = 105, ++ DX_STORE_PATCH_CONSTANT = 106, ++ DX_OUTPUT_CONTROL_POINT_ID = 107, + DX_PRIMITIVE_ID = 108, ++ DX_WAVE_IS_FIRST_LANE = 110, ++ DX_WAVE_GET_LANE_INDEX = 111, ++ DX_WAVE_GET_LANE_COUNT = 112, ++ DX_WAVE_ANY_TRUE = 113, ++ DX_WAVE_ALL_TRUE = 114, ++ DX_WAVE_ACTIVE_ALL_EQUAL = 115, ++ DX_WAVE_ACTIVE_BALLOT = 116, ++ DX_WAVE_READ_LANE_AT = 117, ++ DX_WAVE_READ_LANE_FIRST = 118, ++ DX_WAVE_ACTIVE_OP = 119, ++ DX_WAVE_ACTIVE_BIT = 120, ++ DX_WAVE_PREFIX_OP = 121, DX_LEGACY_F32TOF16 = 130, DX_LEGACY_F16TOF32 = 131, ++ DX_WAVE_ALL_BIT_COUNT = 135, ++ DX_WAVE_PREFIX_BIT_COUNT = 136, DX_RAW_BUFFER_LOAD = 139, -@@ -449,6 +473,32 @@ enum dxil_predicate + DX_RAW_BUFFER_STORE = 140, + }; +@@ -449,6 +513,32 @@ enum dxil_predicate ICMP_SLE = 41, }; @@ -2934,7 +3761,7 @@ index 26a8a5c1cc3..220ba773887 100644 enum dxil_atomic_binop_code { ATOMIC_BINOP_ADD, -@@ -463,6 +513,14 @@ enum dxil_atomic_binop_code +@@ -463,6 +553,29 @@ enum dxil_atomic_binop_code ATOMIC_BINOP_INVALID, }; @@ -2945,19 +3772,36 @@ index 26a8a5c1cc3..220ba773887 100644 + SYNC_THREAD_GROUP_UAV = 0x4, + SYNC_GROUP_SHARED_MEMORY = 0x8, +}; ++ ++enum dxil_wave_bit_op_kind ++{ ++ WAVE_BIT_OP_AND = 0, ++ WAVE_BIT_OP_OR = 1, ++ WAVE_BIT_OP_XOR = 2, ++}; ++ ++enum dxil_wave_op_kind ++{ ++ WAVE_OP_ADD = 0, ++ WAVE_OP_MUL = 1, ++ WAVE_OP_MIN = 2, ++ WAVE_OP_MAX = 3, ++}; + struct sm6_pointer_info { const struct sm6_type *type; -@@ -541,6 +599,7 @@ struct sm6_value +@@ -541,7 +654,9 @@ struct sm6_value { const struct sm6_type *type; enum sm6_value_type value_type; + unsigned int structure_stride; bool is_undefined; ++ bool is_back_ref; union { -@@ -736,9 +795,11 @@ struct sm6_parser + struct sm6_function_data function; +@@ -736,9 +851,12 @@ struct sm6_parser size_t global_symbol_count; const char *entry_point; @@ -2965,11 +3809,12 @@ index 26a8a5c1cc3..220ba773887 100644 struct vkd3d_shader_dst_param *output_params; struct vkd3d_shader_dst_param *input_params; -+ uint32_t io_regs_declared[(VKD3DSPR_COUNT + 0x1f) / 0x20]; ++ struct vkd3d_shader_dst_param *patch_constant_params; ++ uint32_t io_regs_declared[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; struct sm6_function *functions; size_t function_count; -@@ -753,6 +814,7 @@ struct sm6_parser +@@ -753,6 +871,7 @@ struct sm6_parser unsigned int indexable_temp_count; unsigned int icb_count; @@ -2977,7 +3822,19 @@ index 26a8a5c1cc3..220ba773887 100644 struct sm6_value *values; size_t value_count; -@@ -1876,6 +1938,25 @@ static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type +@@ -790,11 +909,6 @@ static size_t size_add_with_overflow_check(size_t a, size_t b) + return (i < a) ? SIZE_MAX : i; + } + +-static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) +-{ +- return CONTAINING_RECORD(parser, struct sm6_parser, p); +-} +- + static bool sm6_parser_is_end(struct sm6_parser *sm6) + { + return sm6->ptr == sm6->end; +@@ -1876,6 +1990,25 @@ static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type return NULL; } @@ -3003,7 +3860,7 @@ index 26a8a5c1cc3..220ba773887 100644 /* Call for aggregate types only. */ static const struct sm6_type *sm6_type_get_element_type_at_index(const struct sm6_type *type, uint64_t elem_idx) { -@@ -2110,6 +2191,15 @@ static inline bool sm6_value_is_undef(const struct sm6_value *value) +@@ -2110,6 +2243,15 @@ static inline bool sm6_value_is_undef(const struct sm6_value *value) return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; } @@ -3019,7 +3876,37 @@ index 26a8a5c1cc3..220ba773887 100644 static bool sm6_value_is_icb(const struct sm6_value *value) { return value->value_type == VALUE_TYPE_ICB; -@@ -2199,6 +2289,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type +@@ -2120,6 +2262,11 @@ static bool sm6_value_is_ssa(const struct sm6_value *value) + return sm6_value_is_register(value) && register_is_ssa(&value->u.reg); + } + ++static bool sm6_value_is_numeric_array(const struct sm6_value *value) ++{ ++ return sm6_value_is_register(value) && register_is_numeric_array(&value->u.reg); ++} ++ + static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) + { + if (!sm6_value_is_constant(value)) +@@ -2153,7 +2300,7 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ + { + struct vkd3d_shader_src_param *params; + +- if (!(params = vsir_program_get_src_params(&sm6->p.program, count))) ++ if (!(params = vsir_program_get_src_params(sm6->p.program, count))) + { + ERR("Failed to allocate src params.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, +@@ -2170,7 +2317,7 @@ static struct vkd3d_shader_dst_param *instruction_dst_params_alloc(struct vkd3d_ + { + struct vkd3d_shader_dst_param *params; + +- if (!(params = vsir_program_get_dst_params(&sm6->p.program, count))) ++ if (!(params = vsir_program_get_dst_params(sm6->p.program, count))) + { + ERR("Failed to allocate dst params.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, +@@ -2199,6 +2346,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type return VKD3D_DATA_BOOL; case 8: return VKD3D_DATA_UINT8; @@ -3028,7 +3915,7 @@ index 26a8a5c1cc3..220ba773887 100644 case 32: return VKD3D_DATA_UINT; case 64: -@@ -2212,6 +2304,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type +@@ -2212,6 +2361,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type { switch (type->u.width) { @@ -3037,7 +3924,7 @@ index 26a8a5c1cc3..220ba773887 100644 case 32: return VKD3D_DATA_FLOAT; case 64: -@@ -2252,6 +2346,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st +@@ -2252,6 +2403,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6); } @@ -3050,7 +3937,7 @@ index 26a8a5c1cc3..220ba773887 100644 static void dst_param_init(struct vkd3d_shader_dst_param *param) { param->write_mask = VKD3DSP_WRITEMASK_0; -@@ -2301,6 +2401,12 @@ static void src_param_init_scalar(struct vkd3d_shader_src_param *param, unsigned +@@ -2301,6 +2458,12 @@ static void src_param_init_scalar(struct vkd3d_shader_src_param *param, unsigned param->modifiers = VKD3DSPSM_NONE; } @@ -3063,7 +3950,7 @@ index 26a8a5c1cc3..220ba773887 100644 static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) { src_param_init(param); -@@ -2315,6 +2421,12 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, +@@ -2315,20 +2478,28 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, param->reg = *reg; } @@ -3076,7 +3963,24 @@ index 26a8a5c1cc3..220ba773887 100644 static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, struct sm6_parser *sm6) { -@@ -2336,14 +2448,18 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, + if (sm6_value_is_constant(address)) + { + idx->offset = sm6_value_get_constant_uint(address); ++ idx->rel_addr = NULL; + } + else if (sm6_value_is_undef(address)) + { + idx->offset = 0; ++ idx->rel_addr = NULL; + } + else + { +- struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(&sm6->p.program, 1); ++ struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(sm6->p.program, 1); + if (rel_addr) + src_param_init_from_value(rel_addr, address); + idx->offset = 0; +@@ -2336,14 +2507,18 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, } } @@ -3097,7 +4001,16 @@ index 26a8a5c1cc3..220ba773887 100644 } static void instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instruction *ins, -@@ -2482,6 +2598,26 @@ static bool sm6_value_validate_is_texture_handle(const struct sm6_value *value, +@@ -2399,7 +2574,7 @@ static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, + * overestimate the value count somewhat, but this should be no problem. */ + value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); + sm6->value_capacity = max(sm6->value_capacity, value_count); +- sm6->functions[sm6->function_count].value_count = value_count; ++ sm6->functions[sm6->function_count++].value_count = value_count; + /* The value count returns to its previous value after handling a function. */ + if (value_count < SIZE_MAX) + value_count = old_value_count; +@@ -2482,6 +2657,26 @@ static bool sm6_value_validate_is_texture_handle(const struct sm6_value *value, return true; } @@ -3124,7 +4037,26 @@ index 26a8a5c1cc3..220ba773887 100644 static bool sm6_value_validate_is_sampler_handle(const struct sm6_value *value, enum dx_intrinsic_opcode op, struct sm6_parser *sm6) { -@@ -2539,6 +2675,30 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 +@@ -2514,6 +2709,18 @@ static bool sm6_value_validate_is_pointer(const struct sm6_value *value, struct + return true; + } + ++static bool sm6_value_validate_is_backward_ref(const struct sm6_value *value, struct sm6_parser *sm6) ++{ ++ if (!value->is_back_ref) ++ { ++ FIXME("Forward-referenced pointers are not supported.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Forward-referenced pointer declarations are not supported."); ++ return false; ++ } ++ return true; ++} ++ + static bool sm6_value_validate_is_numeric(const struct sm6_value *value, struct sm6_parser *sm6) + { + if (!sm6_type_is_numeric(value->type)) +@@ -2539,6 +2746,30 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6 return true; } @@ -3155,7 +4087,7 @@ index 26a8a5c1cc3..220ba773887 100644 static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) { if (idx < sm6->value_count) -@@ -2686,7 +2846,7 @@ static inline uint64_t decode_rotated_signed_value(uint64_t value) +@@ -2686,7 +2917,7 @@ static inline uint64_t decode_rotated_signed_value(uint64_t value) return value << 63; } @@ -3164,7 +4096,7 @@ index 26a8a5c1cc3..220ba773887 100644 { union { -@@ -2710,6 +2870,23 @@ static inline double bitcast_uint64_to_double(uint64_t value) +@@ -2710,6 +2941,23 @@ static inline double bitcast_uint64_to_double(uint64_t value) return u.double_value; } @@ -3188,7 +4120,16 @@ index 26a8a5c1cc3..220ba773887 100644 static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, const struct sm6_type *type, const uint64_t *operands, struct sm6_parser *sm6) { -@@ -2782,6 +2959,122 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co +@@ -2745,7 +2993,7 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co + "Out of memory allocating an immediate constant buffer of count %u.", count); + return VKD3D_ERROR_OUT_OF_MEMORY; + } +- if (!shader_instruction_array_add_icb(&sm6->p.program.instructions, icb)) ++ if (!shader_instruction_array_add_icb(&sm6->p.program->instructions, icb)) + { + ERR("Failed to store icb object.\n"); + vkd3d_free(icb); +@@ -2782,18 +3030,135 @@ static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, co return VKD3D_OK; } @@ -3311,7 +4252,31 @@ index 26a8a5c1cc3..220ba773887 100644 static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) { enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; -@@ -2876,9 +3169,9 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const +- const struct sm6_type *type, *elem_type; ++ const struct sm6_type *type, *elem_type, *ptr_type; ++ size_t i, base_value_idx, value_idx; + enum vkd3d_data_type reg_data_type; + const struct dxil_record *record; ++ const struct sm6_value *src; + enum vkd3d_result ret; + struct sm6_value *dst; +- size_t i, value_idx; + uint64_t value; + +- for (i = 0, type = NULL; i < block->record_count; ++i) ++ for (i = 0, type = NULL, base_value_idx = sm6->value_count; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; +@@ -2834,6 +3199,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + dst = sm6_parser_get_current_value(sm6); + dst->type = type; + dst->value_type = VALUE_TYPE_REG; ++ dst->is_back_ref = true; + vsir_register_init(&dst->u.reg, reg_type, reg_data_type, 0); + + switch (record->code) +@@ -2876,9 +3242,9 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const } if (type->u.width == 16) @@ -3323,7 +4288,7 @@ index 26a8a5c1cc3..220ba773887 100644 else if (type->u.width == 64) dst->u.reg.u.immconst_f64[0] = bitcast_uint64_to_double(record->operands[0]); else -@@ -2902,6 +3195,12 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const +@@ -2902,6 +3268,54 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const break; @@ -3332,11 +4297,53 @@ index 26a8a5c1cc3..220ba773887 100644 + if ((ret = sm6_parser_init_constexpr_gep(sm6, record, dst)) < 0) + return ret; + break; ++ ++ case CST_CODE_CE_CAST: ++ if (!dxil_record_validate_operand_count(record, 3, 3, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if ((value = record->operands[0]) != CAST_BITCAST) ++ { ++ WARN("Unhandled constexpr cast op %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Constexpr cast op %"PRIu64" is unhandled.", value); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ ptr_type = sm6_parser_get_type(sm6, record->operands[1]); ++ if (!sm6_type_is_pointer(ptr_type)) ++ { ++ WARN("Constexpr cast at constant idx %zu is not a pointer.\n", value_idx); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Constexpr cast source operand is not a pointer."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if ((value = record->operands[2]) >= sm6->cur_max_value) ++ { ++ WARN("Invalid value index %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value index %"PRIu64".", value); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ else if (value == value_idx) ++ { ++ WARN("Invalid value self-reference at %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value self-reference for a constexpr cast."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ /* Resolve later in case forward refs exist. */ ++ dst->type = type; ++ dst->u.reg.type = VKD3DSPR_COUNT; ++ dst->u.reg.idx[0].offset = value; ++ break; + case CST_CODE_UNDEF: dxil_record_validate_operand_max_count(record, 0, sm6); dst->u.reg.type = VKD3DSPR_UNDEF; -@@ -2911,6 +3210,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const +@@ -2911,6 +3325,8 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const default: FIXME("Unhandled constant code %u.\n", record->code); @@ -3345,7 +4352,63 @@ index 26a8a5c1cc3..220ba773887 100644 dst->u.reg.type = VKD3DSPR_UNDEF; break; } -@@ -2994,6 +3295,58 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru +@@ -2925,6 +3341,29 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + ++sm6->value_count; + } + ++ /* Resolve cast forward refs. */ ++ for (i = base_value_idx; i < sm6->value_count; ++i) ++ { ++ dst = &sm6->values[i]; ++ if (dst->u.reg.type != VKD3DSPR_COUNT) ++ continue; ++ ++ type = dst->type; ++ ++ src = &sm6->values[dst->u.reg.idx[0].offset]; ++ if (!sm6_value_is_numeric_array(src)) ++ { ++ WARN("Value is not an array.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Constexpr cast source value is not a global array element."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ *dst = *src; ++ dst->type = type; ++ dst->u.reg.data_type = vkd3d_data_type_from_sm6_type(type->u.pointer.type); ++ } ++ + return VKD3D_OK; + } + +@@ -2941,12 +3380,14 @@ static bool bitcode_parse_alignment(uint64_t encoded_alignment, unsigned int *al + + static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) + { +- if (!shader_instruction_array_reserve(&sm6->p.program.instructions, sm6->p.program.instructions.count + extra)) ++ struct vkd3d_shader_instruction_array *instructions = &sm6->p.program->instructions; ++ ++ if (!shader_instruction_array_reserve(instructions, instructions->count + extra)) + { + ERR("Failed to allocate instruction.\n"); + return NULL; + } +- return &sm6->p.program.instructions.elements[sm6->p.program.instructions.count]; ++ return &instructions->elements[instructions->count]; + } + + /* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ +@@ -2956,7 +3397,7 @@ static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_pa + struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); + assert(ins); + vsir_instruction_init(ins, &sm6->p.location, handler_idx); +- ++sm6->p.program.instructions.count; ++ ++sm6->p.program->instructions.count; + return ins; + } + +@@ -2994,6 +3435,58 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); } @@ -3404,7 +4467,15 @@ index 26a8a5c1cc3..220ba773887 100644 static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) { const struct sm6_type *type, *scalar_type; -@@ -3119,10 +3472,17 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ +@@ -3101,6 +3594,7 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ + dst = sm6_parser_get_current_value(sm6); + dst->type = type; + dst->value_type = VALUE_TYPE_REG; ++ dst->is_back_ref = true; + + if (is_constant && !init) + { +@@ -3119,10 +3613,17 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ } else if (address_space == ADDRESS_SPACE_GROUPSHARED) { @@ -3426,14 +4497,23 @@ index 26a8a5c1cc3..220ba773887 100644 } else { -@@ -3158,6 +3518,38 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init +@@ -3158,17 +3659,49 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init return NULL; } +-static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) +static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6) -+{ + { +- size_t i, count, base_value_idx = sm6->value_count; +- const struct dxil_block *block = &sm6->root_block; +- struct vkd3d_shader_instruction *ins; +- const struct dxil_record *record; +- enum vkd3d_result ret; +- uint64_t version; + const struct sm6_value *value; -+ + +- sm6->p.location.line = block->id; +- sm6->p.location.column = 0; + if (!index) + return false; + @@ -3462,10 +4542,33 @@ index 26a8a5c1cc3..220ba773887 100644 + return false; +} + - static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - { - size_t i, count, base_value_idx = sm6->value_count; -@@ -3231,6 +3623,16 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) ++static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) ++{ ++ size_t i, count, base_value_idx = sm6->value_count; ++ const struct dxil_block *block = &sm6->root_block; ++ struct vkd3d_shader_instruction *ins; ++ const struct dxil_record *record; ++ enum vkd3d_result ret; ++ uint64_t version; ++ ++ sm6->p.location.line = block->id; ++ sm6->p.location.column = 0; + + for (i = 0, count = 0; i < block->record_count; ++i) + count += block->records[i]->code == MODULE_CODE_GLOBALVAR; +@@ -3219,9 +3752,9 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) + } + + /* Resolve initialiser forward references. */ +- for (i = 0; i < sm6->p.program.instructions.count; ++i) ++ for (i = 0; i < sm6->p.program->instructions.count; ++i) + { +- ins = &sm6->p.program.instructions.elements[i]; ++ ins = &sm6->p.program->instructions.elements[i]; + if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) + { + ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( +@@ -3231,6 +3764,16 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); } @@ -3482,7 +4585,7 @@ index 26a8a5c1cc3..220ba773887 100644 } for (i = base_value_idx; i < sm6->value_count; ++i) { -@@ -3270,6 +3672,22 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par +@@ -3270,22 +3813,80 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par src_param_init_from_value(&src_params[i], operands[i]); } @@ -3491,6 +4594,8 @@ index 26a8a5c1cc3..220ba773887 100644 +{ + switch (sysval_semantic) + { ++ case VKD3D_SHADER_SV_COVERAGE: ++ return VKD3DSPR_COVERAGE; + case VKD3D_SHADER_SV_DEPTH: + return VKD3DSPR_DEPTHOUT; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: @@ -3503,9 +4608,38 @@ index 26a8a5c1cc3..220ba773887 100644 +} + static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, - enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) +- enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) ++ bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) { -@@ -3282,6 +3700,13 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade ++ enum vkd3d_shader_type shader_type = sm6->p.program->shader_version.type; ++ bool is_patch_constant, is_control_point; + struct vkd3d_shader_dst_param *param; + const struct signature_element *e; + unsigned int i, count; + ++ is_patch_constant = reg_type == VKD3DSPR_PATCHCONST; ++ ++ is_control_point = false; ++ if (!is_patch_constant) ++ { ++ switch (shader_type) ++ { ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ case VKD3D_SHADER_TYPE_GEOMETRY: ++ is_control_point = is_input; ++ break; ++ ++ case VKD3D_SHADER_TYPE_HULL: ++ is_control_point = true; ++ break; ++ ++ default: ++ break; ++ } ++ } ++ + for (i = 0; i < s->element_count; ++i) + { e = &s->elements[i]; param = ¶ms[i]; @@ -3518,8 +4652,47 @@ index 26a8a5c1cc3..220ba773887 100644 + dst_param_io_init(param, e, reg_type); count = 0; - if (e->register_count > 1) -@@ -3350,6 +3775,9 @@ struct function_emission_state +- if (e->register_count > 1) ++ ++ if (is_control_point) ++ { ++ if (reg_type == VKD3DSPR_OUTPUT) ++ param->reg.idx[count].rel_addr = instruction_array_create_outpointid_param(&sm6->p.program->instructions); ++ param->reg.idx[count++].offset = 0; ++ } ++ ++ if (e->register_count > 1 || (is_patch_constant && vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) + param->reg.idx[count++].offset = 0; ++ ++ assert(count < ARRAY_SIZE(param->reg.idx)); + param->reg.idx[count++].offset = i; + param->reg.idx_count = count; + } +@@ -3293,12 +3894,21 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade + + static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) + { +- sm6_parser_init_signature(sm6, output_signature, VKD3DSPR_OUTPUT, sm6->output_params); ++ sm6_parser_init_signature(sm6, output_signature, false, VKD3DSPR_OUTPUT, sm6->output_params); + } + + static void sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) + { +- sm6_parser_init_signature(sm6, input_signature, VKD3DSPR_INPUT, sm6->input_params); ++ sm6_parser_init_signature(sm6, input_signature, true, VKD3DSPR_INPUT, sm6->input_params); ++} ++ ++static void sm6_parser_init_patch_constant_signature(struct sm6_parser *sm6, ++ const struct shader_signature *patch_constant_signature) ++{ ++ bool is_input = sm6->p.program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; ++ ++ sm6_parser_init_signature(sm6, patch_constant_signature, is_input, VKD3DSPR_PATCHCONST, ++ sm6->patch_constant_params); + } + + static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) +@@ -3350,6 +3960,9 @@ struct function_emission_state unsigned int temp_idx; }; @@ -3529,33 +4702,14 @@ index 26a8a5c1cc3..220ba773887 100644 static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { -@@ -3425,30 +3853,153 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec +@@ -3425,6 +4038,130 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); } --static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, -- const struct sm6_type *type_b, struct sm6_parser *sm6) +static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) - { -- bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); -- bool is_bool = sm6_type_is_bool(type_a); -- enum vkd3d_shader_opcode op; -- bool is_valid; -- -- if (!is_int && !sm6_type_is_floating_point(type_a)) -- { -- WARN("Argument type %u is not bool, int16/32/64 or floating point.\n", type_a->class); -- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -- "An argument to a binary operation is not bool, int16/32/64 or floating point."); -- return VKD3DSIH_INVALID; -- } -- if (type_a != type_b) ++{ + switch (code) - { -- WARN("Type mismatch, type %u width %u vs type %u width %u.\n", type_a->class, -- type_a->u.width, type_b->class, type_b->u.width); -- vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, -- "Type mismatch in binary operation arguments."); ++ { + case RMW_ADD: + return VKD3DSIH_IMM_ATOMIC_IADD; + case RMW_AND: @@ -3577,10 +4731,9 @@ index 26a8a5c1cc3..220ba773887 100644 + default: + /* DXIL currently doesn't use SUB and NAND. */ + return VKD3DSIH_INVALID; - } ++ } +} - -- switch (code) ++ +static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, + struct function_emission_state *state, struct sm6_value *dst) +{ @@ -3596,7 +4749,8 @@ index 26a8a5c1cc3..220ba773887 100644 + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -+ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) ++ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) ++ || !sm6_value_validate_is_backward_ref(ptr, sm6)) + return; + + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) @@ -3676,34 +4830,64 @@ index 26a8a5c1cc3..220ba773887 100644 + dst->u.reg = dst_params[0].reg; +} + -+static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, -+ const struct sm6_type *type_b, struct sm6_parser *sm6) + static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, + const struct sm6_type *type_b, struct sm6_parser *sm6) + { +@@ -3756,6 +4493,25 @@ static bool sm6_parser_emit_coordinate_construct(struct sm6_parser *sm6, const s + return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); + } + ++static enum vkd3d_shader_opcode sm6_dx_map_void_op(enum dx_intrinsic_opcode op) +{ -+ bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); -+ bool is_bool = sm6_type_is_bool(type_a); -+ enum vkd3d_shader_opcode op; -+ bool is_valid; -+ -+ if (!is_int && !sm6_type_is_floating_point(type_a)) ++ switch (op) + { -+ WARN("Argument type %u is not bool, int16/32/64 or floating point.\n", type_a->class); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "An argument to a binary operation is not bool, int16/32/64 or floating point."); -+ return VKD3DSIH_INVALID; -+ } -+ if (type_a != type_b) -+ { -+ WARN("Type mismatch, type %u width %u vs type %u width %u.\n", type_a->class, -+ type_a->u.width, type_b->class, type_b->u.width); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, -+ "Type mismatch in binary operation arguments."); ++ case DX_WAVE_IS_FIRST_LANE: ++ return VKD3DSIH_WAVE_IS_FIRST_LANE; ++ default: ++ vkd3d_unreachable(); + } ++} + -+ switch (code) - { - case BINOP_ADD: - case BINOP_SUB: -@@ -3974,6 +4525,98 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr ++static void sm6_parser_emit_dx_void(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_void_op(op)); ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) + { + switch (op) +@@ -3820,6 +4576,18 @@ static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) + return VKD3DSIH_F32TOF16; + case DX_LEGACY_F16TOF32: + return VKD3DSIH_F16TOF32; ++ case DX_WAVE_ACTIVE_ALL_EQUAL: ++ return VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL; ++ case DX_WAVE_ALL_BIT_COUNT: ++ return VKD3DSIH_WAVE_ALL_BIT_COUNT; ++ case DX_WAVE_ALL_TRUE: ++ return VKD3DSIH_WAVE_ALL_TRUE; ++ case DX_WAVE_ANY_TRUE: ++ return VKD3DSIH_WAVE_ANY_TRUE; ++ case DX_WAVE_PREFIX_BIT_COUNT: ++ return VKD3DSIH_WAVE_PREFIX_BIT_COUNT; ++ case DX_WAVE_READ_LANE_FIRST: ++ return VKD3DSIH_WAVE_READ_LANE_FIRST; + default: + vkd3d_unreachable(); + } +@@ -3855,6 +4623,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co + return VKD3DSIH_UMAX; + case DX_UMIN: + return VKD3DSIH_UMIN; ++ case DX_WAVE_READ_LANE_AT: ++ return VKD3DSIH_WAVE_READ_LANE_AT; + default: + vkd3d_unreachable(); + } +@@ -3974,6 +4744,98 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr dst->u.reg = dst_params[0].reg; } @@ -3802,7 +4986,7 @@ index 26a8a5c1cc3..220ba773887 100644 static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { -@@ -4004,6 +4647,38 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr +@@ -4004,6 +4866,44 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr instruction_dst_param_init_ssa_vector(ins, sm6_type_max_vector_size(type), sm6); } @@ -3837,14 +5021,69 @@ index 26a8a5c1cc3..220ba773887 100644 + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} ++ ++static void sm6_parser_emit_dx_coverage(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_COVERAGE, VKD3D_DATA_UINT); ++} + static const struct sm6_descriptor_info *sm6_parser_get_descriptor(struct sm6_parser *sm6, enum vkd3d_shader_descriptor_type type, unsigned int id, const struct sm6_value *address) { -@@ -4065,6 +4740,149 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int +@@ -4065,31 +4965,233 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int ins->handler_idx = VKD3DSIH_NOP; } +-static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { + struct vkd3d_shader_instruction *ins = state->ins; +- struct vkd3d_shader_src_param *src_params; +- unsigned int is_texture, component_count; +- enum dxil_resource_kind resource_kind; +- struct vkd3d_shader_dst_param *dst; +- const struct sm6_value *resource; +- +- resource = operands[0]; +- if (!sm6_value_validate_is_handle(resource, sm6)) +- return; +- is_texture = resource->u.handle.d->resource_type != VKD3D_SHADER_RESOURCE_BUFFER; +- resource_kind = resource->u.handle.d->kind; ++ struct vkd3d_shader_src_param *src_param; ++ unsigned int i; + +- instruction_init_with_resource(ins, is_texture ? VKD3DSIH_RESINFO : VKD3DSIH_BUFINFO, resource, sm6); ++ vsir_instruction_init(ins, &sm6->p.location, (op == DX_CUT_STREAM) ? VKD3DSIH_CUT_STREAM : VKD3DSIH_EMIT_STREAM); + +- if (!(src_params = instruction_src_params_alloc(ins, 1 + is_texture, sm6))) ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; +- src_param_init_vector_from_reg(&src_params[is_texture], &resource->u.handle.reg); + +- if (is_texture) ++ i = sm6_value_get_constant_uint(operands[0]); ++ if (i >= MAX_GS_OUTPUT_STREAMS) + { +- ins->flags = VKD3DSI_RESINFO_UINT; ++ WARN("Invalid stream index %u.\n", i); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Output stream index %u is invalid.", i); ++ } ++ ++ /* VKD3D_DATA_UNUSED would be more reasonable, but TPF uses data type 0 here. */ ++ register_init_with_id(&src_param->reg, VKD3DSPR_STREAM, 0, i); ++ src_param_init(src_param); ++ ++ if (op == DX_EMIT_THEN_CUT_STREAM) ++ { ++ ++state->ins; ++ ++state->code_block->instruction_count; ++ sm6_parser_emit_dx_stream(sm6, DX_CUT_STREAM, operands, state); ++ } ++} ++ +static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ @@ -3857,6 +5096,33 @@ index 26a8a5c1cc3..220ba773887 100644 + src_param_init_from_value(src_param, operands[0]); +} + ++static void sm6_parser_emit_dx_domain_location(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ unsigned int component_idx; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ ++ if ((component_idx = sm6_value_get_constant_uint(operands[0])) >= 3) ++ { ++ WARN("Invalid component index %u.\n", component_idx); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid domain location component index %u.", component_idx); ++ component_idx = 0; ++ } ++ ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 3); ++ vsir_register_init(&src_param->reg, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 0); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ src_param_init_scalar(src_param, component_idx); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ +static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ @@ -3988,22 +5254,103 @@ index 26a8a5c1cc3..220ba773887 100644 + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + - static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_params; ++ unsigned int is_texture, component_count; ++ enum dxil_resource_kind resource_kind; ++ struct vkd3d_shader_dst_param *dst; ++ const struct sm6_value *resource; ++ ++ resource = operands[0]; ++ if (!sm6_value_validate_is_handle(resource, sm6)) ++ return; ++ is_texture = resource->u.handle.d->resource_type != VKD3D_SHADER_RESOURCE_BUFFER; ++ resource_kind = resource->u.handle.d->kind; ++ ++ instruction_init_with_resource(ins, is_texture ? VKD3DSIH_RESINFO : VKD3DSIH_BUFINFO, resource, sm6); ++ ++ if (!(src_params = instruction_src_params_alloc(ins, 1 + is_texture, sm6))) ++ return; ++ src_param_init_vector_from_reg(&src_params[is_texture], &resource->u.handle.reg); ++ ++ if (is_texture) ++ { ++ ins->flags = VKD3DSI_RESINFO_UINT; + src_param_init_from_value(&src_params[0], operands[1]); + component_count = VKD3D_VEC4_SIZE; + +@@ -4171,18 +5273,44 @@ static void sm6_parser_emit_dx_tertiary(struct sm6_parser *sm6, enum dx_intrinsi + static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { -@@ -4182,7 +5000,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin ++ bool is_control_point = op == DX_LOAD_OUTPUT_CONTROL_POINT; ++ bool is_patch_constant = op == DX_LOAD_PATCH_CONSTANT; + struct vkd3d_shader_instruction *ins = state->ins; ++ struct vsir_program *program = sm6->p.program; ++ unsigned int count, row_index, column_index; ++ const struct vkd3d_shader_dst_param *params; + struct vkd3d_shader_src_param *src_param; + const struct shader_signature *signature; +- unsigned int row_index, column_index; + const struct signature_element *e; + row_index = sm6_value_get_constant_uint(operands[0]); + column_index = sm6_value_get_constant_uint(operands[2]); + ++ if (is_control_point && operands[3]->is_undefined) ++ { ++ /* dxcompiler will compile source which does this, so let it pass. */ ++ WARN("Control point id is undefined.\n"); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND, ++ "The index for a control point load is undefined."); ++ } ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); - signature = &sm6->p.shader_desc.input_signature; -+ signature = &sm6->p.program.input_signature; ++ if (is_patch_constant) ++ { ++ signature = &program->patch_constant_signature; ++ params = sm6->patch_constant_params; ++ } ++ else if (is_control_point) ++ { ++ signature = &program->output_signature; ++ params = sm6->output_params; ++ } ++ else ++ { ++ signature = &program->input_signature; ++ params = sm6->input_params; ++ } if (row_index >= signature->element_count) { WARN("Invalid row index %u.\n", row_index); -@@ -4202,6 +5020,32 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin - instruction_dst_param_init_ssa_scalar(ins, sm6); - } +@@ -4194,14 +5322,54 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; +- src_param->reg = sm6->input_params[row_index].reg; ++ src_param->reg = params[row_index].reg; + src_param_init_scalar(src_param, column_index); ++ count = 0; ++ + if (e->register_count > 1) +- register_index_address_init(&src_param->reg.idx[0], operands[1], sm6); ++ register_index_address_init(&src_param->reg.idx[count++], operands[1], sm6); ++ ++ if (!is_patch_constant && !operands[3]->is_undefined) ++ { ++ assert(src_param->reg.idx_count > count); ++ register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); ++ } ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ +static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ @@ -4020,8 +5367,14 @@ index 26a8a5c1cc3..220ba773887 100644 + return; + src_params[0].reg = reg; + src_param_init_vector(&src_params[0], 2); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); + + instruction_dst_param_init_ssa_scalar(ins, sm6); + } + ++static void sm6_parser_emit_dx_output_control_point_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT); +} + +static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -4033,7 +5386,7 @@ index 26a8a5c1cc3..220ba773887 100644 static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { -@@ -4424,6 +5268,59 @@ static void sm6_parser_emit_dx_buffer_store(struct sm6_parser *sm6, enum dx_intr +@@ -4424,6 +5592,59 @@ static void sm6_parser_emit_dx_buffer_store(struct sm6_parser *sm6, enum dx_intr dst_param->reg = resource->u.handle.reg; } @@ -4093,7 +5446,7 @@ index 26a8a5c1cc3..220ba773887 100644 static unsigned int sm6_value_get_texel_offset(const struct sm6_value *value) { return sm6_value_is_undef(value) ? 0 : sm6_value_get_constant_uint(value); -@@ -4521,6 +5418,21 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ +@@ -4521,6 +5742,21 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ instruction_dst_param_init_ssa_vector(ins, component_count, sm6); } @@ -4115,16 +5468,31 @@ index 26a8a5c1cc3..220ba773887 100644 static void sm6_parser_emit_dx_sincos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { -@@ -4572,7 +5484,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr +@@ -4561,7 +5797,9 @@ static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intr + static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { ++ bool is_patch_constant = op == DX_STORE_PATCH_CONSTANT; + struct vkd3d_shader_instruction *ins = state->ins; ++ struct vsir_program *program = sm6->p.program; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_dst_param *dst_param; + const struct shader_signature *signature; +@@ -4572,7 +5810,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr row_index = sm6_value_get_constant_uint(operands[0]); column_index = sm6_value_get_constant_uint(operands[2]); - signature = &sm6->p.shader_desc.output_signature; -+ signature = &sm6->p.program.output_signature; ++ signature = is_patch_constant ? &program->patch_constant_signature : &program->output_signature; if (row_index >= signature->element_count) { WARN("Invalid row index %u.\n", row_index); -@@ -4608,10 +5520,78 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr +@@ -4604,14 +5842,82 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr + if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) + return; + dst_param_init_scalar(dst_param, column_index); +- dst_param->reg = sm6->output_params[row_index].reg; ++ dst_param->reg = is_patch_constant ? sm6->patch_constant_params[row_index].reg : sm6->output_params[row_index].reg; if (e->register_count > 1) register_index_address_init(&dst_param->reg.idx[0], operands[1], sm6); @@ -4203,7 +5571,155 @@ index 26a8a5c1cc3..220ba773887 100644 static void sm6_parser_emit_dx_texture_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { -@@ -4741,9 +5721,12 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = +@@ -4707,6 +6013,131 @@ static void sm6_parser_emit_dx_texture_store(struct sm6_parser *sm6, enum dx_int + dst_param_init_with_mask(dst_param, write_mask); + } + ++static void sm6_parser_emit_dx_wave_active_ballot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_WAVE_ACTIVE_BALLOT); ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_param_init_from_value(src_param, operands[0]); ++ ++ instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); ++} ++ ++static enum vkd3d_shader_opcode sm6_dx_map_wave_bit_op(enum dxil_wave_bit_op_kind op, ++ struct sm6_parser *sm6) ++{ ++ switch (op) ++ { ++ case WAVE_BIT_OP_AND: ++ return VKD3DSIH_WAVE_ACTIVE_BIT_AND; ++ case WAVE_BIT_OP_OR: ++ return VKD3DSIH_WAVE_ACTIVE_BIT_OR; ++ case WAVE_BIT_OP_XOR: ++ return VKD3DSIH_WAVE_ACTIVE_BIT_XOR; ++ default: ++ FIXME("Unhandled wave bit op %u.\n", op); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, ++ "Wave bit operation %u is unhandled.\n", op); ++ return VKD3DSIH_INVALID; ++ } ++} ++ ++static void sm6_parser_emit_dx_wave_active_bit(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ enum dxil_wave_bit_op_kind wave_op; ++ enum vkd3d_shader_opcode opcode; ++ ++ wave_op = sm6_value_get_constant_uint(operands[1]); ++ ++ if ((opcode = sm6_dx_map_wave_bit_op(wave_op, sm6)) == VKD3DSIH_INVALID) ++ return; ++ vsir_instruction_init(ins, &sm6->p.location, opcode); ++ ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_param_init_from_value(src_param, operands[0]); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ ++static enum vkd3d_shader_opcode sm6_dx_map_wave_op(enum dxil_wave_op_kind op, bool is_signed, bool is_float, ++ struct sm6_parser *sm6) ++{ ++ switch (op) ++ { ++ case WAVE_OP_ADD: ++ return VKD3DSIH_WAVE_OP_ADD; ++ case WAVE_OP_MUL: ++ return VKD3DSIH_WAVE_OP_MUL; ++ case WAVE_OP_MIN: ++ if (is_float) ++ return VKD3DSIH_WAVE_OP_MIN; ++ return is_signed ? VKD3DSIH_WAVE_OP_IMIN : VKD3DSIH_WAVE_OP_UMIN; ++ case WAVE_OP_MAX: ++ if (is_float) ++ return VKD3DSIH_WAVE_OP_MAX; ++ return is_signed ? VKD3DSIH_WAVE_OP_IMAX : VKD3DSIH_WAVE_OP_UMAX; ++ default: ++ FIXME("Unhandled wave op %u.\n", op); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, ++ "Wave operation %u is unhandled.\n", op); ++ return VKD3DSIH_INVALID; ++ } ++} ++ ++static void sm6_parser_emit_dx_wave_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ enum vkd3d_shader_opcode opcode; ++ enum dxil_wave_op_kind wave_op; ++ bool is_signed; ++ ++ wave_op = sm6_value_get_constant_uint(operands[1]); ++ is_signed = !sm6_value_get_constant_uint(operands[2]); ++ opcode = sm6_dx_map_wave_op(wave_op, is_signed, sm6_type_is_floating_point(operands[0]->type), sm6); ++ ++ if (opcode == VKD3DSIH_INVALID) ++ return; ++ ++ vsir_instruction_init(ins, &sm6->p.location, opcode); ++ ins->flags = (op == DX_WAVE_PREFIX_OP) ? VKD3DSI_WAVE_PREFIX : 0; ++ ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_param_init_from_value(src_param, operands[0]); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ ++static void sm6_parser_emit_dx_wave_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ enum vkd3d_shader_register_type type; ++ ++ switch (op) ++ { ++ case DX_WAVE_GET_LANE_COUNT: ++ type = VKD3DSPR_WAVELANECOUNT; ++ break; ++ case DX_WAVE_GET_LANE_INDEX: ++ type = VKD3DSPR_WAVELANEINDEX; ++ break; ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, type, VKD3D_DATA_UINT); ++} ++ + struct sm6_dx_opcode_info + { + const char *ret_type; +@@ -4723,6 +6154,7 @@ struct sm6_dx_opcode_info + C -> constant or undefined int8/16/32 + i -> int32 + m -> int16/32/64 ++ n -> any numeric + f -> float + d -> double + e -> half/float +@@ -4730,6 +6162,7 @@ struct sm6_dx_opcode_info + H -> handle + D -> Dimensions + S -> splitdouble ++ V -> 4 x i32 + v -> void + o -> overloaded + R -> matches the return type +@@ -4741,29 +6174,47 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop}, [DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop}, @@ -4216,14 +5732,20 @@ index 26a8a5c1cc3..220ba773887 100644 [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, -@@ -4752,18 +5735,28 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = ++ [DX_COVERAGE ] = {"i", "", sm6_parser_emit_dx_coverage}, + [DX_CREATE_HANDLE ] = {"H", "ccib", sm6_parser_emit_dx_create_handle}, ++ [DX_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, + [DX_DERIV_COARSEX ] = {"e", "R", sm6_parser_emit_dx_unary}, [DX_DERIV_COARSEY ] = {"e", "R", sm6_parser_emit_dx_unary}, [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, + [DX_DISCARD ] = {"v", "1", sm6_parser_emit_dx_discard}, ++ [DX_DOMAIN_LOCATION ] = {"f", "c", sm6_parser_emit_dx_domain_location}, + [DX_DOT2 ] = {"g", "RRRR", sm6_parser_emit_dx_dot}, + [DX_DOT3 ] = {"g", "RRRRRR", sm6_parser_emit_dx_dot}, + [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, ++ [DX_EMIT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, ++ [DX_EMIT_THEN_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, @@ -4245,16 +5767,20 @@ index 26a8a5c1cc3..220ba773887 100644 [DX_IMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_IMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_ISFINITE ] = {"1", "g", sm6_parser_emit_dx_unary}, -@@ -4773,6 +5766,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = +@@ -4772,7 +6223,12 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_LEGACY_F16TOF32 ] = {"f", "i", sm6_parser_emit_dx_unary}, [DX_LEGACY_F32TOF16 ] = {"i", "f", sm6_parser_emit_dx_unary}, [DX_LOAD_INPUT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, ++ [DX_LOAD_OUTPUT_CONTROL_POINT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, ++ [DX_LOAD_PATCH_CONSTANT ] = {"o", "ii8", sm6_parser_emit_dx_load_input}, [DX_LOG ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, ++ [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, + [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, -@@ -4780,20 +5775,29 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = +@@ -4780,22 +6236,46 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_ROUND_PI ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_ROUND_Z ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_RSQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, @@ -4271,6 +5797,7 @@ index 26a8a5c1cc3..220ba773887 100644 [DX_SPLIT_DOUBLE ] = {"S", "d", sm6_parser_emit_dx_split_double}, [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, ++ [DX_STORE_PATCH_CONSTANT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_TEX2DMS_GET_SAMPLE_POS ] = {"o", "Hi", sm6_parser_emit_dx_get_sample_pos}, + [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, @@ -4283,8 +5810,42 @@ index 26a8a5c1cc3..220ba773887 100644 + [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, ++ [DX_WAVE_ACTIVE_ALL_EQUAL ] = {"1", "n", sm6_parser_emit_dx_unary}, ++ [DX_WAVE_ACTIVE_BALLOT ] = {"V", "1", sm6_parser_emit_dx_wave_active_ballot}, ++ [DX_WAVE_ACTIVE_BIT ] = {"m", "Rc", sm6_parser_emit_dx_wave_active_bit}, ++ [DX_WAVE_ACTIVE_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, ++ [DX_WAVE_ALL_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, ++ [DX_WAVE_ALL_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, ++ [DX_WAVE_ANY_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, ++ [DX_WAVE_GET_LANE_COUNT ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, ++ [DX_WAVE_GET_LANE_INDEX ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, ++ [DX_WAVE_IS_FIRST_LANE ] = {"1", "", sm6_parser_emit_dx_void}, ++ [DX_WAVE_PREFIX_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, ++ [DX_WAVE_PREFIX_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, ++ [DX_WAVE_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, ++ [DX_WAVE_READ_LANE_FIRST ] = {"n", "R", sm6_parser_emit_dx_unary}, }; -@@ -5055,7 +6059,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ + + static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_value *value, char info_type, +@@ -4827,6 +6307,8 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc + return sm6_type_is_i32(type); + case 'm': + return sm6_type_is_i16_i32_i64(type); ++ case 'n': ++ return sm6_type_is_numeric(type); + case 'f': + return sm6_type_is_float(type); + case 'd': +@@ -4841,6 +6323,8 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc + return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Dimensions"); + case 'S': + return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.splitdouble"); ++ case 'V': ++ return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.fouri32"); + case 'v': + return !type; + case 'o': +@@ -5055,7 +6539,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ break; case CAST_ZEXT: case CAST_SEXT: @@ -4296,7 +5857,7 @@ index 26a8a5c1cc3..220ba773887 100644 if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) { op = VKD3DSIH_NOP; -@@ -5187,8 +6194,8 @@ static const struct sm6_cmp_info *sm6_map_cmp2_op(uint64_t code) +@@ -5187,8 +6674,8 @@ static const struct sm6_cmp_info *sm6_map_cmp2_op(uint64_t code) [FCMP_OLT] = {VKD3DSIH_LTO}, [FCMP_OLE] = {VKD3DSIH_GEO, true}, [FCMP_ONE] = {VKD3DSIH_NEO}, @@ -4307,10 +5868,58 @@ index 26a8a5c1cc3..220ba773887 100644 [FCMP_UEQ] = {VKD3DSIH_EQU}, [FCMP_UGT] = {VKD3DSIH_LTU, true}, [FCMP_UGE] = {VKD3DSIH_GEU}, -@@ -5304,6 +6311,87 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor - instruction_dst_param_init_ssa_scalar(ins, sm6); - } +@@ -5248,6 +6735,15 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor + code = record->operands[i++]; + ++ /* dxcompiler occasionally emits bool not-equal-to-false, which is a no-op. Bool comparisons ++ * do not otherwise occur, so deleting these avoids the need for backend support. */ ++ if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) ++ { ++ ins->handler_idx = VKD3DSIH_NOP; ++ *dst = *a; ++ return; ++ } ++ + if ((!is_int && !is_fp) || is_int != (code >= ICMP_EQ)) + { + FIXME("Invalid operation %"PRIu64" on type class %u.\n", code, type_a->class); +@@ -5274,34 +6770,116 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor + + vsir_instruction_init(ins, &sm6->p.location, cmp->handler_idx); + +- flags = (record->operand_count > i) ? record->operands[i] : 0; +- silence_warning = false; ++ flags = (record->operand_count > i) ? record->operands[i] : 0; ++ silence_warning = false; ++ ++ if (is_fp) ++ { ++ if (!(flags & FP_ALLOW_UNSAFE_ALGEBRA)) ++ ins->flags |= VKD3DSI_PRECISE_X; ++ flags &= ~FP_ALLOW_UNSAFE_ALGEBRA; ++ /* SPIR-V FPFastMathMode is only available in the Kernel execution model. */ ++ silence_warning = !(flags & ~(FP_NO_NAN | FP_NO_INF | FP_NO_SIGNED_ZEROS | FP_ALLOW_RECIPROCAL)); ++ } ++ if (flags && silence_warning) ++ { ++ TRACE("Ignoring fast FP modifier %#"PRIx64".\n", flags); ++ } ++ else if (flags) ++ { ++ WARN("Ignoring flags %#"PRIx64".\n", flags); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring flags %#"PRIx64" for a comparison operation.", flags); ++ } ++ ++ if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) ++ return; ++ src_param_init_from_value(&src_params[0 ^ cmp->src_swap], a); ++ src_param_init_from_value(&src_params[1 ^ cmp->src_swap], b); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ +static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_record *record, + struct vkd3d_shader_instruction *ins, struct sm6_value *dst) +{ @@ -4324,24 +5933,34 @@ index 26a8a5c1cc3..220ba773887 100644 + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -+ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) ++ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) ++ || !sm6_value_validate_is_backward_ref(ptr, sm6)) + return; -+ + +- if (is_fp) + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) -+ { + { +- if (!(flags & FP_ALLOW_UNSAFE_ALGEBRA)) +- ins->flags |= VKD3DSI_PRECISE_X; +- flags &= ~FP_ALLOW_UNSAFE_ALGEBRA; +- /* SPIR-V FPFastMathMode is only available in the Kernel execution model. */ +- silence_warning = !(flags & ~(FP_NO_NAN | FP_NO_INF | FP_NO_SIGNED_ZEROS | FP_ALLOW_RECIPROCAL)); + WARN("Register is not groupshared.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "The destination register for a cmpxchg instruction is not groupshared memory."); + return; -+ } + } +- if (flags && silence_warning) + + if (!(dst->type = sm6_type_get_cmpxchg_result_struct(sm6))) -+ { + { +- TRACE("Ignoring fast FP modifier %#"PRIx64".\n", flags); + WARN("Failed to find result struct.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "Module does not define a result struct type for a cmpxchg instruction."); + return; -+ } + } +- else if (flags) + + type = ptr->type->u.pointer.type; + cmp = sm6_parser_get_value_by_ref(sm6, record, type, &i); @@ -4352,10 +5971,14 @@ index 26a8a5c1cc3..220ba773887 100644 + if (!sm6_value_validate_is_i32(cmp, sm6) + || !sm6_value_validate_is_i32(new, sm6) + || !dxil_record_validate_operand_count(record, i + 3, i + 5, sm6)) -+ { + { +- WARN("Ignoring flags %#"PRIx64".\n", flags); +- vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, +- "Ignoring flags %#"PRIx64" for a comparison operation.", flags); + return; -+ } -+ + } + +- if (!(src_params = instruction_src_params_alloc(ins, 2, sm6))) + is_volatile = record->operands[i++]; + success_ordering = record->operands[i++]; + @@ -4377,11 +6000,14 @@ index 26a8a5c1cc3..220ba773887 100644 + ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST; + + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -+ return; + return; +- src_param_init_from_value(&src_params[0 ^ cmp->src_swap], a); +- src_param_init_from_value(&src_params[1 ^ cmp->src_swap], b); + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[1], cmp); + src_param_init_from_value(&src_params[2], new); -+ + +- instruction_dst_param_init_ssa_scalar(ins, sm6); + if (!(dst_params = instruction_dst_params_alloc(ins, 2, sm6))) + return; + register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); @@ -4390,12 +6016,10 @@ index 26a8a5c1cc3..220ba773887 100644 + dst_param_init(&dst_params[1]); + + dst->u.reg = dst_params[0].reg; -+} -+ + } + static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil_record *record, - struct vkd3d_shader_instruction *ins, struct sm6_value *dst) - { -@@ -5459,6 +6547,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record +@@ -5459,6 +7037,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record register_index_address_init(®->idx[1], elem_value, sm6); reg->idx[1].is_in_bounds = is_in_bounds; reg->idx_count = 2; @@ -4403,7 +6027,7 @@ index 26a8a5c1cc3..220ba773887 100644 ins->handler_idx = VKD3DSIH_NOP; } -@@ -5467,8 +6556,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor +@@ -5467,8 +7046,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { const struct sm6_type *elem_type = NULL, *pointee_type; @@ -4414,7 +6038,15 @@ index 26a8a5c1cc3..220ba773887 100644 const struct sm6_value *ptr; uint64_t alignment_code; -@@ -5505,12 +6594,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor +@@ -5476,6 +7055,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor + return; + if (!sm6_value_validate_is_register(ptr, sm6) + || !sm6_value_validate_is_pointer(ptr, sm6) ++ || !sm6_value_validate_is_backward_ref(ptr, sm6) + || !dxil_record_validate_operand_count(record, i + 2, i + 3, sm6)) + return; + +@@ -5505,12 +7085,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); @@ -4454,7 +6086,7 @@ index 26a8a5c1cc3..220ba773887 100644 instruction_dst_param_init_ssa_scalar(ins, sm6); } -@@ -5628,11 +6739,11 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record +@@ -5628,16 +7230,17 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { @@ -4468,7 +6100,14 @@ index 26a8a5c1cc3..220ba773887 100644 uint64_t alignment_code; if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -@@ -5665,16 +6776,40 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco + || !sm6_value_validate_is_register(ptr, sm6) +- || !sm6_value_validate_is_pointer(ptr, sm6)) ++ || !sm6_value_validate_is_pointer(ptr, sm6) ++ || !sm6_value_validate_is_backward_ref(ptr, sm6)) + { + return; + } +@@ -5665,16 +7268,40 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); @@ -4513,7 +6152,7 @@ index 26a8a5c1cc3..220ba773887 100644 } static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -5855,6 +6990,25 @@ static bool sm6_metadata_get_uint64_value(const struct sm6_parser *sm6, +@@ -5855,6 +7482,25 @@ static bool sm6_metadata_get_uint64_value(const struct sm6_parser *sm6, return true; } @@ -4539,7 +6178,35 @@ index 26a8a5c1cc3..220ba773887 100644 static void sm6_parser_metadata_attachment_block_init(struct sm6_parser *sm6, const struct dxil_block *target_block, const struct dxil_block *block) { -@@ -6215,6 +7369,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const +@@ -6124,6 +7770,7 @@ static enum vkd3d_result sm6_function_resolve_phi_incomings(const struct sm6_fun + static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, + struct sm6_function *function) + { ++ struct vsir_program *program = sm6->p.program; + struct vkd3d_shader_instruction *ins; + size_t i, block_idx, block_count; + const struct dxil_record *record; +@@ -6132,11 +7779,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + struct sm6_block *code_block; + struct sm6_value *dst; + +- if (sm6->function_count) +- { +- FIXME("Multiple functions are not supported yet.\n"); +- return VKD3D_ERROR_INVALID_SHADER; +- } + if (!(function->declaration = sm6_parser_next_function_definition(sm6))) + { + WARN("Failed to find definition to match function body.\n"); +@@ -6207,6 +7849,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + fwd_type = dst->type; + dst->type = NULL; + dst->value_type = VALUE_TYPE_REG; ++ dst->is_back_ref = true; + is_terminator = false; + + record = block->records[i]; +@@ -6215,6 +7858,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const case FUNC_CODE_INST_ALLOCA: sm6_parser_emit_alloca(sm6, record, ins, dst); break; @@ -4547,13 +6214,22 @@ index 26a8a5c1cc3..220ba773887 100644 + { + struct function_emission_state state = {code_block, ins}; + sm6_parser_emit_atomicrmw(sm6, record, &state, dst); -+ sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); ++ program->temp_count = max(program->temp_count, state.temp_idx); + break; + } case FUNC_CODE_INST_BINOP: sm6_parser_emit_binop(sm6, record, ins, dst); break; -@@ -6235,6 +7396,9 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const +@@ -6226,7 +7876,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + { + struct function_emission_state state = {code_block, ins}; + sm6_parser_emit_call(sm6, record, &state, dst); +- sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx); ++ program->temp_count = max(program->temp_count, state.temp_idx); + break; + } + case FUNC_CODE_INST_CAST: +@@ -6235,6 +7885,9 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const case FUNC_CODE_INST_CMP2: sm6_parser_emit_cmp2(sm6, record, ins, dst); break; @@ -4563,9 +6239,41 @@ index 26a8a5c1cc3..220ba773887 100644 case FUNC_CODE_INST_EXTRACTVAL: sm6_parser_emit_extractval(sm6, record, ins, dst); break; -@@ -6803,11 +7967,45 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = +@@ -6497,9 +8150,10 @@ static void sm6_parser_emit_label(struct sm6_parser *sm6, unsigned int label_id) + + static enum vkd3d_result sm6_function_emit_blocks(const struct sm6_function *function, struct sm6_parser *sm6) + { ++ struct vsir_program *program = sm6->p.program; + unsigned int i; + +- sm6->p.program.block_count = function->block_count; ++ program->block_count = function->block_count; + + for (i = 0; i < function->block_count; ++i) + { +@@ -6515,9 +8169,9 @@ static enum vkd3d_result sm6_function_emit_blocks(const struct sm6_function *fun + sm6_parser_emit_label(sm6, block->id); + sm6_block_emit_phi(block, sm6); + +- memcpy(&sm6->p.program.instructions.elements[sm6->p.program.instructions.count], block->instructions, ++ memcpy(&program->instructions.elements[program->instructions.count], block->instructions, + block->instruction_count * sizeof(*block->instructions)); +- sm6->p.program.instructions.count += block->instruction_count; ++ program->instructions.count += block->instruction_count; + + sm6_block_emit_terminator(block, sm6); + } +@@ -6800,14 +8454,53 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = + { + [SEMANTIC_KIND_ARBITRARY] = VKD3D_SHADER_SV_NONE, + [SEMANTIC_KIND_VERTEXID] = VKD3D_SHADER_SV_VERTEX_ID, ++ [SEMANTIC_KIND_INSTANCEID] = VKD3D_SHADER_SV_INSTANCE_ID, [SEMANTIC_KIND_POSITION] = VKD3D_SHADER_SV_POSITION, ++ [SEMANTIC_KIND_CLIPDISTANCE] = VKD3D_SHADER_SV_CLIP_DISTANCE, ++ [SEMANTIC_KIND_CULLDISTANCE] = VKD3D_SHADER_SV_CULL_DISTANCE, ++ [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, ++ [SEMANTIC_KIND_COVERAGE] = VKD3D_SHADER_SV_COVERAGE, [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, + [SEMANTIC_KIND_DEPTH] = VKD3D_SHADER_SV_DEPTH, + [SEMANTIC_KIND_DEPTHLESSEQUAL] = VKD3D_SHADER_SV_DEPTH_LESS_EQUAL, @@ -4611,7 +6319,16 @@ index 26a8a5c1cc3..220ba773887 100644 { return sysval_semantic_table[kind]; } -@@ -7563,12 +8761,13 @@ static void signature_element_read_additional_element_values(struct signature_el +@@ -7448,7 +9141,7 @@ static enum vkd3d_result sm6_parser_descriptor_type_init(struct sm6_parser *sm6, + } + + ++sm6->descriptor_count; +- ++sm6->p.program.instructions.count; ++ ++sm6->p.program->instructions.count; + } + + return VKD3D_OK; +@@ -7563,12 +9256,13 @@ static void signature_element_read_additional_element_values(struct signature_el } static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, @@ -4626,7 +6343,7 @@ index 26a8a5c1cc3..220ba773887 100644 if (!m) return VKD3D_OK; -@@ -7656,7 +8855,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -7656,7 +9350,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const e->min_precision = minimum_precision_from_dxil_component_type(values[2]); j = values[3]; @@ -4635,7 +6352,7 @@ index 26a8a5c1cc3..220ba773887 100644 if (j != SEMANTIC_KIND_ARBITRARY && j != SEMANTIC_KIND_TARGET && e->sysval_semantic == VKD3D_SHADER_SV_NONE) { WARN("Unhandled semantic kind %u.\n", j); -@@ -7677,7 +8876,18 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -7677,7 +9371,18 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const column_count = values[7]; e->register_index = values[8]; e->target_location = e->register_index; @@ -4655,7 +6372,7 @@ index 26a8a5c1cc3..220ba773887 100644 { WARN("Invalid row start %u with row count %u.\n", e->register_index, e->register_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -@@ -7685,8 +8895,9 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -7685,8 +9390,9 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const e->register_index, e->register_count); return VKD3D_ERROR_INVALID_SHADER; } @@ -4666,7 +6383,7 @@ index 26a8a5c1cc3..220ba773887 100644 { WARN("Invalid column start %u with count %u.\n", index, column_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -@@ -7696,10 +8907,13 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -7696,10 +9402,17 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const e->mask = vkd3d_write_mask_from_component_count(column_count); e->used_mask = e->mask; @@ -4680,47 +6397,146 @@ index 26a8a5c1cc3..220ba773887 100644 + e->mask <<= index; + e->used_mask <<= index; + } ++ ++ /* DXIL reads/writes uint for bool I/O. */ ++ if (e->component_type == VKD3D_SHADER_COMPONENT_BOOL) ++ e->component_type = VKD3D_SHADER_COMPONENT_UINT; m = element_node->operands[4]; if (!sm6_metadata_value_is_node(m)) -@@ -7739,7 +8953,8 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - return VKD3D_OK; - } +@@ -7732,128 +9445,476 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const + } + } --static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) +- vkd3d_free(s->elements); +- s->elements = elements; +- s->element_count = operand_count; ++ vkd3d_free(s->elements); ++ s->elements = elements; ++ s->element_count = operand_count; ++ ++ return VKD3D_OK; ++} ++ +static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, + enum vkd3d_tessellator_domain tessellator_domain) - { - enum vkd3d_result ret; - -@@ -7752,19 +8967,19 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons - } - - if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], -- &sm6->p.shader_desc.input_signature)) < 0) -+ &sm6->p.program.input_signature, tessellator_domain)) < 0) - { - return ret; - } - if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], -- &sm6->p.shader_desc.output_signature)) < 0) -+ &sm6->p.program.output_signature, tessellator_domain)) < 0) - { - return ret; - } - /* TODO: patch constant signature in operand 2. */ - -- sm6_parser_init_input_signature(sm6, &sm6->p.shader_desc.input_signature); -- sm6_parser_init_output_signature(sm6, &sm6->p.shader_desc.output_signature); -+ sm6_parser_init_input_signature(sm6, &sm6->p.program.input_signature); -+ sm6_parser_init_output_signature(sm6, &sm6->p.program.output_signature); - - return VKD3D_OK; - } -@@ -7850,10 +9065,216 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co - return VKD3D_OK; - } - ++{ ++ struct vsir_program *program = sm6->p.program; ++ enum vkd3d_result ret; ++ ++ if (!sm6_metadata_value_is_node(m)) ++ { ++ WARN("Signature table is not a node.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, ++ "Signature table is not a metadata node."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], ++ &program->input_signature, tessellator_domain)) < 0) ++ { ++ return ret; ++ } ++ if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], ++ &program->output_signature, tessellator_domain)) < 0) ++ { ++ return ret; ++ } ++ if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], ++ &program->patch_constant_signature, tessellator_domain)) < 0) ++ { ++ return ret; ++ } ++ ++ sm6_parser_init_input_signature(sm6, &program->input_signature); ++ sm6_parser_init_output_signature(sm6, &program->output_signature); ++ sm6_parser_init_patch_constant_signature(sm6, &program->patch_constant_signature); ++ ++ return VKD3D_OK; ++} ++ ++static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm6_metadata_value *m) ++{ ++ enum vkd3d_shader_global_flags global_flags, mask, rotated_flags; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags)) ++ { ++ WARN("Failed to load global flags.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Global flags metadata value is not an integer."); ++ return; ++ } ++ /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vkd3d_shader_global_flags. */ ++ mask = (VKD3DSGF_SKIP_OPTIMIZATION << 1) - 1; ++ rotated_flags = global_flags & mask; ++ rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4); ++ global_flags = (global_flags & ~mask) | rotated_flags; ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_GLOBAL_FLAGS); ++ ins->declaration.global_flags = global_flags; ++} ++ ++static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) ++{ ++ struct vkd3d_shader_version *version = &sm6->p.program->shader_version; ++ const struct sm6_metadata_node *node; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int group_sizes[3]; ++ unsigned int i; ++ ++ if (version->type != VKD3D_SHADER_TYPE_COMPUTE) ++ { ++ WARN("Shader of type %#x has thread group dimensions.\n", version->type); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Shader has thread group dimensions but is not a compute shader."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!m || !sm6_metadata_value_is_node(m)) ++ { ++ WARN("Thread group dimension value is not a node.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Thread group dimension metadata value is not a node."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ node = m->u.node; ++ if (node->operand_count != 3) ++ { ++ WARN("Invalid operand count %u.\n", node->operand_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "Thread group dimension operand count %u is invalid.", node->operand_count); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ for (i = 0; i < 3; ++i) ++ { ++ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &group_sizes[i])) ++ { ++ WARN("Thread group dimension is not an integer value.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Thread group dimension metadata value is not an integer."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (!group_sizes[i] || group_sizes[i] > dx_max_thread_group_size[i]) ++ { ++ char dim = "XYZ"[i]; ++ WARN("Invalid thread group %c dimension %u.\n", dim, group_sizes[i]); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Thread group %c dimension %u is invalid.", dim, group_sizes[i]); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ } ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_THREAD_GROUP); ++ ins->declaration.thread_group_size.x = group_sizes[0]; ++ ins->declaration.thread_group_size.y = group_sizes[1]; ++ ins->declaration.thread_group_size.z = group_sizes[2]; ++ ++ return VKD3D_OK; ++} ++ +static void sm6_parser_emit_dcl_count(struct sm6_parser *sm6, enum vkd3d_shader_opcode handler_idx, unsigned int count) +{ + struct vkd3d_shader_instruction *ins; @@ -4729,6 +6545,17 @@ index 26a8a5c1cc3..220ba773887 100644 + ins->declaration.count = count; +} + ++static void sm6_parser_emit_dcl_primitive_topology(struct sm6_parser *sm6, ++ enum vkd3d_shader_opcode handler_idx, enum vkd3d_primitive_type primitive_type, ++ unsigned int patch_vertex_count) ++{ ++ struct vkd3d_shader_instruction *ins; ++ ++ ins = sm6_parser_add_instruction(sm6, handler_idx); ++ ins->declaration.primitive_type.type = primitive_type; ++ ins->declaration.primitive_type.patch_vertex_count = patch_vertex_count; ++} ++ +static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, + enum vkd3d_tessellator_domain tessellator_domain) +{ @@ -4760,7 +6587,8 @@ index 26a8a5c1cc3..220ba773887 100644 + enum vkd3d_shader_tessellator_partitioning tessellator_partitioning) +{ + struct vkd3d_shader_instruction *ins; -+ + +- return VKD3D_OK; + if (!tessellator_partitioning || tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) + { + WARN("Unhandled partitioning %u.\n", tessellator_partitioning); @@ -4770,20 +6598,29 @@ index 26a8a5c1cc3..220ba773887 100644 + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); + ins->declaration.tessellator_partitioning = tessellator_partitioning; -+} -+ + } + +-static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) +static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, + enum vkd3d_shader_tessellator_output_primitive primitive) -+{ + { +- enum vkd3d_result ret; + struct vkd3d_shader_instruction *ins; -+ + +- if (!sm6_metadata_value_is_node(m)) + if (!primitive || primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) -+ { + { +- WARN("Signature table is not a node.\n"); +- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, +- "Signature table is not a metadata node."); +- return VKD3D_ERROR_INVALID_SHADER; + WARN("Unhandled output primitive %u.\n", primitive); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader tessellator output primitive %u is unhandled.", primitive); -+ } -+ + } + +- if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], +- &sm6->p.shader_desc.input_signature)) < 0) + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); + ins->declaration.tessellator_output_primitive = primitive; +} @@ -4794,36 +6631,187 @@ index 26a8a5c1cc3..220ba773887 100644 + float max_tessellation_factor; + + if (!sm6_metadata_get_float_value(sm6, m, &max_tessellation_factor)) -+ { + { +- return ret; + WARN("Max tess factor property is not a float value.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader max tessellation factor property operand is not a float."); + return; -+ } + } +- if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], +- &sm6->p.shader_desc.output_signature)) < 0) + + /* Exclude non-finite values. */ + if (!(max_tessellation_factor >= 1.0f && max_tessellation_factor <= 64.0f)) -+ { + { +- return ret; + WARN("Invalid max tess factor %f.\n", max_tessellation_factor); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader max tessellation factor %f is invalid.", max_tessellation_factor); -+ } -+ + } +- /* TODO: patch constant signature in operand 2. */ +- +- sm6_parser_init_input_signature(sm6, &sm6->p.shader_desc.input_signature); +- sm6_parser_init_output_signature(sm6, &sm6->p.shader_desc.output_signature); + +- return VKD3D_OK; + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_HS_MAX_TESSFACTOR); + ins->declaration.max_tessellation_factor = max_tessellation_factor; -+} + } + +-static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm6_metadata_value *m) ++static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) + { +- enum vkd3d_shader_global_flags global_flags, mask, rotated_flags; +- struct vkd3d_shader_instruction *ins; ++ enum vkd3d_primitive_type input_primitive = VKD3D_PT_TRIANGLELIST, output_primitive; ++ unsigned int i, input_control_point_count = 1, patch_vertex_count = 0; ++ const struct sm6_metadata_node *node; ++ unsigned int operands[5] = {0}; + +- if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags)) ++ if (!m || !sm6_metadata_value_is_node(m)) + { +- WARN("Failed to load global flags.\n"); ++ WARN("Missing or invalid GS properties.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, +- "Global flags metadata value is not an integer."); ++ "Geometry shader properties node is missing or invalid."); + return; + } +- /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vkd3d_shader_global_flags. */ +- mask = (VKD3DSGF_SKIP_OPTIMIZATION << 1) - 1; +- rotated_flags = global_flags & mask; +- rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4); +- global_flags = (global_flags & ~mask) | rotated_flags; + +- ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_GLOBAL_FLAGS); +- ins->declaration.global_flags = global_flags; ++ node = m->u.node; ++ if (node->operand_count < ARRAY_SIZE(operands)) ++ { ++ WARN("Invalid operand count %u.\n", node->operand_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "Geometry shader properties operand count %u is invalid.", node->operand_count); ++ return; ++ } ++ if (node->operand_count > ARRAY_SIZE(operands)) ++ { ++ WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring %zu extra operands for geometry shader properties.", ++ node->operand_count - ARRAY_SIZE(operands)); ++ } + ++ for (i = 0; i < node->operand_count; ++i) ++ { ++ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) ++ { ++ WARN("GS property at index %u is not a uint value.\n", i); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Geometry shader properties operand at index %u is not an integer.", i); ++ } ++ } ++ ++ switch (i = operands[0]) ++ { ++ case INPUT_PRIMITIVE_POINT: ++ input_primitive = VKD3D_PT_POINTLIST; ++ input_control_point_count = 1; ++ break; ++ ++ case INPUT_PRIMITIVE_LINE: ++ input_primitive = VKD3D_PT_LINELIST; ++ input_control_point_count = 2; ++ break; ++ ++ case INPUT_PRIMITIVE_TRIANGLE: ++ input_primitive = VKD3D_PT_TRIANGLELIST; ++ input_control_point_count = 3; ++ break; ++ ++ case INPUT_PRIMITIVE_LINEWITHADJACENCY: ++ input_primitive = VKD3D_PT_LINELIST_ADJ; ++ input_control_point_count = 4; ++ break; ++ ++ case INPUT_PRIMITIVE_TRIANGLEWITHADJACENY: ++ input_primitive = VKD3D_PT_TRIANGLELIST_ADJ; ++ input_control_point_count = 6; ++ break; ++ ++ default: ++ if (i >= INPUT_PRIMITIVE_PATCH1 && i <= INPUT_PRIMITIVE_PATCH32) ++ { ++ input_primitive = VKD3D_PT_PATCH; ++ patch_vertex_count = i - INPUT_PRIMITIVE_PATCH1 + 1; ++ break; ++ } ++ ++ WARN("Unhandled input primitive %u.\n", i); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Geometry shader input primitive %u is unhandled.", i); ++ break; ++ } ++ ++ sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_INPUT_PRIMITIVE, input_primitive, patch_vertex_count); ++ sm6->p.program->input_control_point_count = input_control_point_count; ++ ++ i = operands[1]; ++ /* Max total scalar count sets an upper limit. We would need to scan outputs to be more precise. */ ++ if (i > MAX_GS_OUTPUT_TOTAL_SCALARS) ++ { ++ WARN("GS output vertex count %u invalid.\n", i); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Geometry shader output vertex count %u is invalid.", i); ++ } ++ sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_VERTICES_OUT, i); ++ ++ if (operands[2] > 1) ++ { ++ FIXME("Unhandled stream mask %#x.\n", operands[2]); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Geometry shader stream mask %#x is unhandled.", operands[2]); ++ } ++ ++ output_primitive = operands[3]; ++ if (output_primitive == VKD3D_PT_UNDEFINED || output_primitive >= VKD3D_PT_COUNT) ++ { ++ WARN("Unhandled output primitive %u.\n", output_primitive); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Geometry shader output primitive %u is unhandled.", output_primitive); ++ output_primitive = VKD3D_PT_TRIANGLELIST; ++ } ++ sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, output_primitive, 0); ++ ++ i = operands[4]; ++ if (!i || i > MAX_GS_INSTANCE_COUNT) ++ { ++ WARN("GS instance count %u invalid.\n", i); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, ++ "Geometry shader instance count %u is invalid.", i); ++ } ++ sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_GS_INSTANCES, i); + } + +-static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) +static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_parser *sm6, + const struct sm6_metadata_value *m) -+{ -+ const struct sm6_metadata_node *node; + { + const struct sm6_metadata_node *node; +- struct vkd3d_shader_instruction *ins; +- unsigned int group_sizes[3]; + unsigned int operands[2] = {0}; -+ unsigned int i; -+ + unsigned int i; + +- if (sm6->p.program.shader_version.type != VKD3D_SHADER_TYPE_COMPUTE) + if (!m || !sm6_metadata_value_is_node(m)) -+ { + { +- WARN("Shader of type %#x has thread group dimensions.\n", sm6->p.program.shader_version.type); + WARN("Missing or invalid DS properties.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, +- "Shader has thread group dimensions but is not a compute shader."); +- return VKD3D_ERROR_INVALID_SHADER; + "Domain shader properties node is missing or invalid."); + return 0; + } @@ -4852,11 +6840,11 @@ index 26a8a5c1cc3..220ba773887 100644 + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Domain shader properties operand at index %u is not an integer.", i); + } -+ } -+ + } + + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); + sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); -+ sm6->p.program.input_control_point_count = operands[1]; ++ sm6->p.program->input_control_point_count = operands[1]; + + return operands[0]; +} @@ -4864,23 +6852,30 @@ index 26a8a5c1cc3..220ba773887 100644 +static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_parser *sm6, + const struct sm6_metadata_value *m) +{ ++ struct vsir_program *program = sm6->p.program; + const struct sm6_metadata_node *node; + unsigned int operands[6] = {0}; + unsigned int i; + -+ if (!m || !sm6_metadata_value_is_node(m)) -+ { + if (!m || !sm6_metadata_value_is_node(m)) + { +- WARN("Thread group dimension value is not a node.\n"); + WARN("Missing or invalid HS properties.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, +- "Thread group dimension metadata value is not a node."); +- return VKD3D_ERROR_INVALID_SHADER; + "Hull shader properties node is missing or invalid."); + return 0; -+ } -+ -+ node = m->u.node; + } + + node = m->u.node; +- if (node->operand_count != 3) + if (node->operand_count < 7) -+ { -+ WARN("Invalid operand count %u.\n", node->operand_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, + { + WARN("Invalid operand count %u.\n", node->operand_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, +- "Thread group dimension operand count %u is invalid.", node->operand_count); +- return VKD3D_ERROR_INVALID_SHADER; + "Hull shader properties operand count %u is invalid.", node->operand_count); + return 0; + } @@ -4889,11 +6884,20 @@ index 26a8a5c1cc3..220ba773887 100644 + WARN("Ignoring %u extra operands.\n", node->operand_count - 7); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Ignoring %u extra operands for hull shader properties.", node->operand_count - 7); -+ } -+ + } + +- for (i = 0; i < 3; ++i) + m = node->operands[0]; + if (!sm6_metadata_value_is_value(m) || !sm6_value_is_function_dcl(m->u.value)) -+ { + { +- if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &group_sizes[i])) +- { +- WARN("Thread group dimension is not an integer value.\n"); +- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, +- "Thread group dimension metadata value is not an integer."); +- return VKD3D_ERROR_INVALID_SHADER; +- } +- if (!group_sizes[i] || group_sizes[i] > dx_max_thread_group_size[i]) + WARN("Patch constant function node is not a function value.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + "Hull shader patch constant function node is not a function value."); @@ -4906,26 +6910,35 @@ index 26a8a5c1cc3..220ba773887 100644 + for (i = 1; i < min(node->operand_count, ARRAY_SIZE(operands)); ++i) + { + if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) -+ { + { +- char dim = "XYZ"[i]; +- WARN("Invalid thread group %c dimension %u.\n", dim, group_sizes[i]); + WARN("HS property at index %u is not a uint value.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, +- "Thread group %c dimension %u is invalid.", dim, group_sizes[i]); +- return VKD3D_ERROR_INVALID_SHADER; + "Hull shader properties operand at index %u is not an integer.", i); -+ } -+ } -+ + } + } + +- ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_THREAD_GROUP); +- ins->declaration.thread_group_size.x = group_sizes[0]; +- ins->declaration.thread_group_size.y = group_sizes[1]; +- ins->declaration.thread_group_size.z = group_sizes[2]; + sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); -+ sm6->p.program.input_control_point_count = operands[1]; ++ program->input_control_point_count = operands[1]; + sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); + sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); -+ sm6->p.program.output_control_point_count = operands[2]; ++ program->output_control_point_count = operands[2]; + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); + sm6_parser_emit_dcl_tessellator_partitioning(sm6, operands[4]); + sm6_parser_emit_dcl_tessellator_output_primitive(sm6, operands[5]); + sm6_parser_emit_dcl_max_tessellation_factor(sm6, node->operands[6]); -+ + +- return VKD3D_OK; + return operands[3]; -+} -+ + } + static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) { const struct sm6_metadata_value *m = sm6_parser_find_named_metadata(sm6, "dx.entryPoints"); @@ -4934,7 +6947,7 @@ index 26a8a5c1cc3..220ba773887 100644 unsigned int i, operand_count, tag; const struct sm6_value *value; enum vkd3d_result ret; -@@ -7892,12 +9313,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) +@@ -7892,12 +9953,6 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) "Entry point function name %s does not match the name in metadata.", sm6->entry_point); } @@ -4947,10 +6960,13 @@ index 26a8a5c1cc3..220ba773887 100644 if (entry_node->operand_count >= 5 && (m = entry_node->operands[4])) { if (!sm6_metadata_value_is_node(m)) -@@ -7932,6 +9347,12 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) +@@ -7932,6 +9987,15 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) case SHADER_PROPERTIES_FLAGS: sm6_parser_emit_global_flags(sm6, node->operands[i + 1]); break; ++ case SHADER_PROPERTIES_GEOMETRY: ++ sm6_parser_gs_properties_init(sm6, node->operands[i + 1]); ++ break; + case SHADER_PROPERTIES_DOMAIN: + tessellator_domain = sm6_parser_ds_properties_init(sm6, node->operands[i + 1]); + break; @@ -4960,7 +6976,7 @@ index 26a8a5c1cc3..220ba773887 100644 case SHADER_PROPERTIES_COMPUTE: if ((ret = sm6_parser_emit_thread_group(sm6, node->operands[i + 1])) < 0) return ret; -@@ -7945,6 +9366,12 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) +@@ -7945,6 +10009,12 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) } } @@ -4973,28 +6989,49 @@ index 26a8a5c1cc3..220ba773887 100644 return VKD3D_OK; } -@@ -8062,7 +9489,6 @@ static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) +@@ -8049,28 +10119,18 @@ static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) + vkd3d_free(functions); + } + +-static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) ++static void sm6_parser_cleanup(struct sm6_parser *sm6) + { +- struct sm6_parser *sm6 = sm6_parser(parser); +- + dxil_block_destroy(&sm6->root_block); + dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); +- vsir_program_cleanup(&parser->program); + sm6_type_table_cleanup(sm6->types, sm6->type_count); + sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); + sm6_functions_cleanup(sm6->functions, sm6->function_count); sm6_parser_metadata_cleanup(sm6); vkd3d_free(sm6->descriptors); vkd3d_free(sm6->values); - free_shader_desc(&parser->shader_desc); - vkd3d_free(sm6); +- vkd3d_free(sm6); } -@@ -8080,15 +9506,16 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 +-static const struct vkd3d_shader_parser_ops sm6_parser_ops = +-{ +- .parser_destroy = sm6_parser_destroy, +-}; +- + static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6, const char *name) + { + size_t i; +@@ -8080,15 +10140,15 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 return NULL; } -static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, - const char *source_name, struct vkd3d_shader_message_context *message_context) -+static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *source_name, ++static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, const char *source_name, + struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) { - const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; - const struct shader_signature *input_signature = &sm6->p.shader_desc.input_signature; -+ const struct shader_signature *output_signature = &sm6->p.program.output_signature; -+ const struct shader_signature *input_signature = &sm6->p.program.input_signature; -+ size_t count, length, function_count, byte_code_size = dxbc_desc->byte_code_size; ++ size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; ++ struct shader_signature *patch_constant_signature, *output_signature, *input_signature; const struct vkd3d_shader_location location = {.source_name = source_name}; uint32_t version_token, dxil_version, token_count, magic; + const uint32_t *byte_code = dxbc_desc->byte_code; @@ -5003,31 +7040,242 @@ index 26a8a5c1cc3..220ba773887 100644 enum bitcode_block_abbreviation abbr; struct vkd3d_shader_version version; struct dxil_block *block; -@@ -8181,6 +9608,11 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t +@@ -8176,11 +10236,20 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + + /* Estimate instruction count to avoid reallocation in most shaders. */ + count = max(token_count, 400) - 400; +- vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, +- (count + (count >> 2)) / 2u + 10); ++ if (!vsir_program_init(program, &version, (count + (count >> 2)) / 2u + 10)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ vkd3d_shader_parser_init(&sm6->p, program, message_context, source_name); sm6->ptr = &sm6->start[1]; sm6->bitpos = 2; -+ sm6->p.program.input_signature = dxbc_desc->input_signature; -+ sm6->p.program.output_signature = dxbc_desc->output_signature; -+ sm6->p.program.patch_constant_signature = dxbc_desc->patch_constant_signature; ++ input_signature = &program->input_signature; ++ output_signature = &program->output_signature; ++ patch_constant_signature = &program->patch_constant_signature; ++ *input_signature = dxbc_desc->input_signature; ++ *output_signature = dxbc_desc->output_signature; ++ *patch_constant_signature = dxbc_desc->patch_constant_signature; + memset(dxbc_desc, 0, sizeof(*dxbc_desc)); + block = &sm6->root_block; if ((ret = dxil_block_init(block, NULL, sm6)) < 0) { -@@ -8351,7 +9783,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +@@ -8192,7 +10261,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + "DXIL bitcode chunk has invalid bitcode."); + else + vkd3d_unreachable(); +- return ret; ++ goto fail; + } + + dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); +@@ -8225,7 +10294,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + "DXIL type table is invalid."); + else + vkd3d_unreachable(); +- return ret; ++ goto fail; + } + + if ((ret = sm6_parser_symtab_init(sm6)) < 0) +@@ -8238,16 +10307,19 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + "DXIL value symbol table is invalid."); + else + vkd3d_unreachable(); +- return ret; ++ goto fail; + } + +- if (!(sm6->output_params = vsir_program_get_dst_params(&sm6->p.program, output_signature->element_count)) +- || !(sm6->input_params = vsir_program_get_dst_params(&sm6->p.program, input_signature->element_count))) ++ if (!(sm6->output_params = vsir_program_get_dst_params(program, output_signature->element_count)) ++ || !(sm6->input_params = vsir_program_get_dst_params(program, input_signature->element_count)) ++ || !(sm6->patch_constant_params = vsir_program_get_dst_params(program, ++ patch_constant_signature->element_count))) + { + ERR("Failed to allocate input/output parameters.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating input/output parameters."); +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ goto fail; + } + + function_count = dxil_block_compute_function_count(&sm6->root_block); +@@ -8256,7 +10328,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + ERR("Failed to allocate function array.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating DXIL function array."); +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ goto fail; + } + + if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) +@@ -8264,27 +10337,31 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + WARN("Value array count overflowed.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "Overflow occurred in the DXIL module value count."); +- return VKD3D_ERROR_INVALID_SHADER; ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ goto fail; + } + if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) + { + ERR("Failed to allocate value array.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory allocating DXIL value array."); +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ goto fail; + } ++ sm6->function_count = 0; + sm6->ssa_next_id = 1; + + if ((ret = sm6_parser_globals_init(sm6)) < 0) + { + WARN("Failed to load global declarations.\n"); +- return ret; ++ goto fail; + } + + if (!sm6_parser_allocate_named_metadata(sm6)) + { + ERR("Failed to allocate named metadata array.\n"); +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ goto fail; + } + + for (i = 0, j = 0; i < sm6->root_block.child_block_count; ++i) +@@ -8298,18 +10375,19 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + FIXME("Too many metadata tables.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_METADATA, + "A metadata table count greater than %zu is unsupported.", ARRAY_SIZE(sm6->metadata_tables)); +- return VKD3D_ERROR_INVALID_SHADER; ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ goto fail; + } + + if ((ret = sm6_parser_metadata_init(sm6, block, &sm6->metadata_tables[j++])) < 0) +- return ret; ++ goto fail; + } + + if ((ret = sm6_parser_entry_point_init(sm6)) < 0) +- return ret; ++ goto fail; + + if ((ret = sm6_parser_resources_init(sm6)) < 0) +- return ret; ++ goto fail; + + if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) + { +@@ -8319,92 +10397,124 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "DXIL module is invalid."); +- return ret; ++ goto fail; + } + +- if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count)) ++ if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count ++ + patch_constant_signature->element_count)) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory emitting shader signature declarations."); +- return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ goto fail; + } + +- sm6->p.program.ssa_count = sm6->ssa_next_id; ++ program->ssa_count = sm6->ssa_next_id; + + if (!(fn = sm6_parser_get_function(sm6, sm6->entry_point))) + { + WARN("Failed to find entry point %s.\n", sm6->entry_point); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_ENTRY_POINT, + "The definition of the entry point function '%s' was not found.", sm6->entry_point); +- return VKD3D_ERROR_INVALID_SHADER; ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ goto fail; + } + +- assert(sm6->function_count == 1); +- if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) +- return ret; ++ if (version.type == VKD3D_SHADER_TYPE_HULL) ++ { ++ sm6_parser_add_instruction(sm6, VKD3DSIH_HS_CONTROL_POINT_PHASE); ++ ++ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) ++ goto fail; ++ ++ if (!(fn = sm6_parser_get_function(sm6, sm6->patch_constant_function))) ++ { ++ WARN("Failed to find patch constant function '%s'.\n", sm6->patch_constant_function); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "Failed to find the patch constant function '%s' for a hull shader.", ++ sm6->patch_constant_function); ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ goto fail; ++ } ++ ++ sm6_parser_add_instruction(sm6, VKD3DSIH_HS_FORK_PHASE); ++ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) ++ goto fail; ++ ++ expected_function_count = 2; ++ } ++ else ++ { ++ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) ++ goto fail; ++ expected_function_count = 1; ++ } ++ ++ if (sm6->function_count > expected_function_count) ++ { ++ FIXME("%zu unhandled functions.\n", sm6->function_count - expected_function_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "%zu functions were not emitted.", sm6->function_count - expected_function_count); ++ } + + dxil_block_destroy(&sm6->root_block); + + return VKD3D_OK; ++ ++fail: ++ vsir_program_cleanup(program); ++ return ret; + } + +-int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) ++int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, ++ struct vkd3d_shader_message_context *message_context, struct vsir_program *program) { - struct vkd3d_shader_desc *shader_desc; + struct dxbc_shader_desc dxbc_desc = {0}; ++ struct sm6_parser sm6 = {0}; uint32_t *byte_code = NULL; - struct sm6_parser *sm6; +- struct sm6_parser *sm6; int ret; -@@ -8364,35 +9796,37 @@ int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compi - return VKD3D_ERROR_OUT_OF_MEMORY; - } + ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); + +- if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) +- { +- ERR("Failed to allocate parser.\n"); +- return VKD3D_ERROR_OUT_OF_MEMORY; +- } +- - shader_desc = &sm6->p.shader_desc; - shader_desc->is_dxil = true; + dxbc_desc.is_dxil = true; @@ -5036,7 +7284,7 @@ index 26a8a5c1cc3..220ba773887 100644 + &dxbc_desc)) < 0) { WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm6); +- vkd3d_free(sm6); return ret; } @@ -5056,7 +7304,6 @@ index 26a8a5c1cc3..220ba773887 100644 + { + ERR("Failed to allocate aligned chunk.\n"); + free_dxbc_shader_desc(&dxbc_desc); -+ vkd3d_free(sm6); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + @@ -5066,18 +7313,34 @@ index 26a8a5c1cc3..220ba773887 100644 - ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, message_context); -+ ret = sm6_parser_init(sm6, compile_info->source_name, message_context, &dxbc_desc); ++ ret = sm6_parser_init(&sm6, program, compile_info->source_name, message_context, &dxbc_desc); + free_dxbc_shader_desc(&dxbc_desc); vkd3d_free(byte_code); - if (!sm6->p.failed && ret >= 0) +- if (!sm6->p.failed && ret >= 0) - ret = vsir_validate(&sm6->p); -+ ret = vkd3d_shader_parser_validate(&sm6->p); ++ if (!sm6.p.failed && ret >= 0) ++ ret = vkd3d_shader_parser_validate(&sm6.p, config_flags); - if (sm6->p.failed && ret >= 0) +- if (sm6->p.failed && ret >= 0) ++ if (sm6.p.failed && ret >= 0) ret = VKD3D_ERROR_INVALID_SHADER; + ++ sm6_parser_cleanup(&sm6); + if (ret < 0) + { +- WARN("Failed to initialise shader parser.\n"); +- sm6_parser_destroy(&sm6->p); ++ WARN("Failed to parse shader.\n"); + return ret; + } + +- *parser = &sm6->p; +- + return ret; + } diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index bc70d5220fd..98443797543 100644 +index bc70d5220fd..57b4ac24212 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -61,9 +61,9 @@ struct fx_write_context; @@ -5091,13 +7354,22 @@ index bc70d5220fd..98443797543 100644 }; struct fx_write_context -@@ -84,8 +84,14 @@ struct fx_write_context +@@ -82,10 +82,23 @@ struct fx_write_context + uint32_t technique_count; + uint32_t group_count; uint32_t buffer_count; ++ uint32_t shared_buffer_count; uint32_t numeric_variable_count; ++ uint32_t shared_numeric_variable_count; uint32_t object_variable_count; + uint32_t shared_object_count; -+ uint32_t shader_variable_count; ++ uint32_t shader_count; + uint32_t parameter_count; ++ uint32_t dsv_count; ++ uint32_t rtv_count; ++ uint32_t texture_count; ++ uint32_t uav_count; ++ uint32_t sampler_state_count; int status; + bool child_effect; @@ -5106,7 +7378,7 @@ index bc70d5220fd..98443797543 100644 const struct fx_write_context_ops *ops; }; -@@ -97,6 +103,11 @@ static void set_status(struct fx_write_context *fx, int status) +@@ -97,6 +110,11 @@ static void set_status(struct fx_write_context *fx, int status) fx->status = status; } @@ -5118,7 +7390,7 @@ index bc70d5220fd..98443797543 100644 static uint32_t write_string(const char *string, struct fx_write_context *fx) { return fx->ops->write_string(string, fx); -@@ -104,15 +115,22 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) +@@ -104,26 +122,37 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) { @@ -5129,9 +7401,11 @@ index bc70d5220fd..98443797543 100644 } +static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); ++static const char * get_fx_4_type_name(const struct hlsl_type *type); + static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) { ++ const struct hlsl_type *element_type; struct type_entry *type_entry; unsigned int elements_count; const char *name; @@ -5140,8 +7414,23 @@ index bc70d5220fd..98443797543 100644 + if (type->class == HLSL_CLASS_ARRAY) { - name = hlsl_get_multiarray_element_type(type)->name; -@@ -138,7 +156,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context +- name = hlsl_get_multiarray_element_type(type)->name; + elements_count = hlsl_get_multiarray_size(type); ++ element_type = hlsl_get_multiarray_element_type(type); + } + else + { +- name = type->name; + elements_count = 0; ++ element_type = type; + } + ++ name = get_fx_4_type_name(element_type); ++ + LIST_FOR_EACH_ENTRY(type_entry, &fx->types, struct type_entry, entry) + { + if (strcmp(type_entry->name, name)) +@@ -138,7 +167,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context if (!(type_entry = hlsl_alloc(fx->ctx, sizeof(*type_entry)))) return 0; @@ -5150,15 +7439,15 @@ index bc70d5220fd..98443797543 100644 type_entry->name = name; type_entry->elements_count = elements_count; -@@ -151,6 +169,7 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co +@@ -151,6 +180,7 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co struct fx_write_context *fx) { unsigned int version = ctx->profile->major_version; -+ struct hlsl_block block; ++ struct hlsl_ir_var *var; memset(fx, 0, sizeof(*fx)); -@@ -174,12 +193,19 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co +@@ -174,12 +204,25 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co rb_init(&fx->strings, string_storage_compare); list_init(&fx->types); @@ -5166,9 +7455,15 @@ index bc70d5220fd..98443797543 100644 + fx->child_effect = fx->ops->are_child_effects_supported && ctx->child_effect; + fx->include_empty_buffers = version == 4 && ctx->include_empty_buffers; + -+ hlsl_block_init(&block); -+ hlsl_prepend_global_uniform_copy(fx->ctx, &block); -+ hlsl_block_cleanup(&block); ++ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) ++ { ++ list_add_tail(&ctx->extern_vars, &var->extern_entry); ++ var->is_uniform = 1; ++ } ++ } ++ + hlsl_calculate_buffer_offsets(fx->ctx); } @@ -5179,7 +7474,7 @@ index bc70d5220fd..98443797543 100644 rb_destroy(&fx->strings, string_storage_destroy, NULL); -@@ -189,7 +215,7 @@ static int fx_write_context_cleanup(struct fx_write_context *fx) +@@ -189,14 +232,14 @@ static int fx_write_context_cleanup(struct fx_write_context *fx) vkd3d_free(type); } @@ -5188,15 +7483,45 @@ index bc70d5220fd..98443797543 100644 } static bool technique_matches_version(const struct hlsl_ir_var *var, const struct fx_write_context *fx) -@@ -285,6 +311,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - [HLSL_TYPE_UINT ] = 3, - [HLSL_TYPE_BOOL ] = 4, + { + const struct hlsl_type *type = var->data_type; + +- if (type->base_type != HLSL_TYPE_TECHNIQUE) ++ if (type->class != HLSL_CLASS_TECHNIQUE) + return false; + + return type->e.version >= fx->min_technique_version && type->e.version <= fx->max_technique_version; +@@ -266,6 +309,14 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) + return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; + } + ++static const uint32_t fx_4_numeric_base_type[] = ++{ ++ [HLSL_TYPE_FLOAT] = 1, ++ [HLSL_TYPE_INT ] = 2, ++ [HLSL_TYPE_UINT ] = 3, ++ [HLSL_TYPE_BOOL ] = 4, ++}; ++ + static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx) + { + static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3; +@@ -278,13 +329,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, + [HLSL_CLASS_VECTOR] = 2, + [HLSL_CLASS_MATRIX] = 3, }; +- static const uint32_t numeric_base_type[] = +- { +- [HLSL_TYPE_FLOAT] = 1, +- [HLSL_TYPE_INT ] = 2, +- [HLSL_TYPE_UINT ] = 3, +- [HLSL_TYPE_BOOL ] = 4, +- }; + struct hlsl_ctx *ctx = fx->ctx; uint32_t value = 0; switch (type->class) -@@ -295,8 +322,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, +@@ -295,22 +340,20 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, value |= numeric_type_class[type->class]; break; default: @@ -5206,17 +7531,24 @@ index bc70d5220fd..98443797543 100644 return 0; } -@@ -309,8 +335,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - value |= (numeric_base_type[type->base_type] << NUMERIC_BASE_TYPE_SHIFT); +- switch (type->base_type) ++ switch (type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: +- value |= (numeric_base_type[type->base_type] << NUMERIC_BASE_TYPE_SHIFT); ++ value |= (fx_4_numeric_base_type[type->e.numeric.type] << NUMERIC_BASE_TYPE_SHIFT); break; default: - FIXME("Unexpected base type %u.\n", type->base_type); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->base_type); ++ hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->e.numeric.type); return 0; } -@@ -322,19 +347,14 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, +@@ -322,20 +365,8 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, return value; } @@ -5228,37 +7560,43 @@ index bc70d5220fd..98443797543 100644 - uint32_t elements_count = 0; - const char *name; - static const uint32_t variable_type[] = -+ static const char * const object_type_names[] = - { +- { - [HLSL_CLASS_SCALAR] = 1, - [HLSL_CLASS_VECTOR] = 1, - [HLSL_CLASS_MATRIX] = 1, - [HLSL_CLASS_OBJECT] = 2, - [HLSL_CLASS_STRUCT] = 3, -+ [HLSL_TYPE_PIXELSHADER] = "PixelShader", -+ [HLSL_TYPE_VERTEXSHADER] = "VertexShader", -+ [HLSL_TYPE_RENDERTARGETVIEW] = "RenderTargetView", -+ [HLSL_TYPE_DEPTHSTENCILVIEW] = "DepthStencilView", - }; +- }; static const char * const texture_type_names[] = { -@@ -360,6 +380,32 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + [HLSL_SAMPLER_DIM_GENERIC] = "texture", +@@ -360,6 +391,41 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", }; -+ if (type->class == HLSL_CLASS_TEXTURE) -+ return texture_type_names[type->sampler_dim]; -+ -+ if (type->class == HLSL_CLASS_UAV) -+ return uav_type_names[type->sampler_dim]; -+ -+ switch (type->base_type) ++ switch (type->class) + { -+ case HLSL_TYPE_PIXELSHADER: -+ case HLSL_TYPE_VERTEXSHADER: -+ case HLSL_TYPE_RENDERTARGETVIEW: -+ case HLSL_TYPE_DEPTHSTENCILVIEW: -+ return object_type_names[type->base_type]; ++ case HLSL_CLASS_SAMPLER: ++ return "SamplerState"; ++ ++ case HLSL_CLASS_TEXTURE: ++ return texture_type_names[type->sampler_dim]; ++ ++ case HLSL_CLASS_UAV: ++ return uav_type_names[type->sampler_dim]; ++ ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ return "DepthStencilView"; ++ ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ return "RenderTargetView"; ++ ++ case HLSL_CLASS_VERTEX_SHADER: ++ return "VertexShader"; ++ ++ case HLSL_CLASS_PIXEL_SHADER: ++ return "PixelShader"; ++ + default: + return type->name; + } @@ -5270,12 +7608,11 @@ index bc70d5220fd..98443797543 100644 + uint32_t name_offset, offset, size, stride, numeric_desc; + uint32_t elements_count = 0; + const char *name; -+ struct hlsl_ctx *ctx = fx->ctx; + /* Resolve arrays to element type and number of elements. */ if (type->class == HLSL_CLASS_ARRAY) { -@@ -367,12 +413,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -367,12 +433,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co type = hlsl_get_multiarray_element_type(type); } @@ -5289,16 +7626,21 @@ index bc70d5220fd..98443797543 100644 name_offset = write_string(name, fx); offset = put_u32_unaligned(buffer, name_offset); -@@ -382,11 +423,25 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -382,11 +443,31 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: +- case HLSL_CLASS_OBJECT: + put_u32_unaligned(buffer, 1); + break; + - case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VERTEX_SHADER: + put_u32_unaligned(buffer, 2); + break; + @@ -5309,15 +7651,17 @@ index bc70d5220fd..98443797543 100644 - default: + + case HLSL_CLASS_ARRAY: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_TECHNIQUE: + vkd3d_unreachable(); + -+ case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_VOID: FIXME("Writing type class %u is not implemented.\n", type->class); set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); return 0; -@@ -422,13 +477,8 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -422,13 +503,8 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, field_type_offset); } } @@ -5332,56 +7676,86 @@ index bc70d5220fd..98443797543 100644 static const uint32_t texture_type[] = { [HLSL_SAMPLER_DIM_GENERIC] = 9, -@@ -442,6 +492,11 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -442,6 +518,15 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_CUBE] = 17, [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, }; + + put_u32_unaligned(buffer, texture_type[type->sampler_dim]); + } ++ else if (type->class == HLSL_CLASS_SAMPLER) ++ { ++ put_u32_unaligned(buffer, 21); ++ } + else if (type->class == HLSL_CLASS_UAV) + { static const uint32_t uav_type[] = { [HLSL_SAMPLER_DIM_1D] = 31, -@@ -453,21 +508,28 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -453,29 +538,35 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, }; -+ put_u32_unaligned(buffer, uav_type[type->sampler_dim]); -+ } -+ else if (type->class == HLSL_CLASS_OBJECT) -+ { -+ static const uint32_t object_type[] = -+ { -+ [HLSL_TYPE_PIXELSHADER] = 5, -+ [HLSL_TYPE_VERTEXSHADER] = 6, -+ [HLSL_TYPE_RENDERTARGETVIEW] = 19, -+ [HLSL_TYPE_DEPTHSTENCILVIEW] = 20, -+ }; -+ - switch (type->base_type) - { - case HLSL_TYPE_DEPTHSTENCILVIEW: -+ case HLSL_TYPE_PIXELSHADER: - case HLSL_TYPE_RENDERTARGETVIEW: -+ case HLSL_TYPE_VERTEXSHADER: - put_u32_unaligned(buffer, object_type[type->base_type]); - break; +- switch (type->base_type) +- { +- case HLSL_TYPE_DEPTHSTENCILVIEW: +- case HLSL_TYPE_RENDERTARGETVIEW: +- put_u32_unaligned(buffer, object_type[type->base_type]); +- break; - case HLSL_TYPE_TEXTURE: - put_u32_unaligned(buffer, texture_type[type->sampler_dim]); - break; - case HLSL_TYPE_UAV: - put_u32_unaligned(buffer, uav_type[type->sampler_dim]); - break; - default: +- default: - FIXME("Object type %u is not supported.\n", type->base_type); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ hlsl_fixme(ctx, &ctx->location, "Object type %u is not supported.", type->base_type); - return 0; - } +- return 0; +- } ++ put_u32_unaligned(buffer, uav_type[type->sampler_dim]); ++ } ++ else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) ++ { ++ put_u32_unaligned(buffer, 20); ++ } ++ else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW) ++ { ++ put_u32_unaligned(buffer, 19); ++ } ++ else if (type->class == HLSL_CLASS_PIXEL_SHADER) ++ { ++ put_u32_unaligned(buffer, 5); ++ } ++ else if (type->class == HLSL_CLASS_VERTEX_SHADER) ++ { ++ put_u32_unaligned(buffer, 6); } -@@ -565,11 +627,71 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f +- else /* Numeric type */ ++ else if (hlsl_is_numeric_type(type)) + { + numeric_desc = get_fx_4_numeric_type_description(type, fx); + put_u32_unaligned(buffer, numeric_desc); + } ++ else ++ { ++ FIXME("Type %u is not supported.\n", type->class); ++ set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); ++ return 0; ++ } + + return offset; + } +@@ -556,7 +647,7 @@ static void write_groups(struct fx_write_context *fx) + { + const struct hlsl_type *type = var->data_type; + +- if (type->base_type == HLSL_TYPE_EFFECT_GROUP) ++ if (type->class == HLSL_CLASS_EFFECT_GROUP) + write_group(var, fx); + } + } +@@ -565,11 +656,71 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f { struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; const char *s = string ? string : ""; @@ -5453,7 +7827,7 @@ index bc70d5220fd..98443797543 100644 return offset; } -@@ -595,6 +717,163 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex +@@ -595,6 +746,161 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex set_u32(buffer, count_offset, count); } @@ -5554,26 +7928,24 @@ index bc70d5220fd..98443797543 100644 + } + break; + -+ case HLSL_CLASS_OBJECT: -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_PIXELSHADER: -+ case HLSL_TYPE_VERTEXSHADER: -+ hlsl_fixme(ctx, loc, "Write fx 2.0 parameter object type %#x.", type->base_type); -+ return false; -+ -+ default: -+ return false; -+ } -+ ++ case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: ++ case HLSL_CLASS_VERTEX_SHADER: + hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); + return false; + ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_UAV: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_VOID: + return false; ++ ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_TECHNIQUE: ++ /* This cannot appear as an extern variable. */ ++ break; + } + + vkd3d_unreachable(); @@ -5617,7 +7989,7 @@ index bc70d5220fd..98443797543 100644 static const struct fx_write_context_ops fx_2_ops = { .write_string = write_fx_2_string, -@@ -604,12 +883,13 @@ static const struct fx_write_context_ops fx_2_ops = +@@ -604,12 +910,13 @@ static const struct fx_write_context_ops fx_2_ops = static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) { @@ -5632,7 +8004,7 @@ index bc70d5220fd..98443797543 100644 structured = &fx.structured; /* First entry is always zeroed and skipped. */ -@@ -618,12 +898,14 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -618,12 +925,14 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, 0xfeff0901); /* Version. */ offset = put_u32(&buffer, 0); @@ -5650,7 +8022,7 @@ index bc70d5220fd..98443797543 100644 write_techniques(ctx->globals, &fx); set_u32(structured, technique_count, fx.technique_count); -@@ -643,24 +925,27 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -643,35 +952,39 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) vkd3d_free(fx.unstructured.data); vkd3d_free(fx.structured.data); @@ -5682,8 +8054,14 @@ index bc70d5220fd..98443797543 100644 + .are_child_effects_supported = true, }; - static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) -@@ -672,6 +957,7 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write +-static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) ++static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) + { + struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t name_offset, type_offset, value_offset; + uint32_t semantic_offset, flags = 0; +- uint32_t name_offset, type_offset; + enum fx_4_numeric_variable_flags { HAS_EXPLICIT_BIND_POINT = 0x4, }; @@ -5691,13 +8069,342 @@ index bc70d5220fd..98443797543 100644 /* Explicit bind point. */ if (var->reg_reservation.reg_type) -@@ -690,14 +976,18 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write +@@ -686,18 +999,345 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write + + semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ + put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ +- put_u32(buffer, 0); /* FIXME: default value offset */ ++ value_offset = put_u32(buffer, 0); /* Default value offset */ put_u32(buffer, flags); /* Flags */ - put_u32(buffer, 0); /* Annotations count */ +- put_u32(buffer, 0); /* Annotations count */ - /* FIXME: write annotations */ -+ if (has_annotations(var)) -+ hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); ++ if (shared) ++ { ++ fx->shared_numeric_variable_count++; ++ } ++ else ++ { ++ /* FIXME: write default value */ ++ set_u32(buffer, value_offset, 0); ++ ++ put_u32(buffer, 0); /* Annotations count */ ++ if (has_annotations(var)) ++ hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); ++ ++ fx->numeric_variable_count++; ++ } ++} ++ ++struct rhs_named_value ++{ ++ const char *name; ++ unsigned int value; ++}; ++ ++static bool get_fx_4_state_enum_value(const struct rhs_named_value *pairs, ++ const char *name, unsigned int *value) ++{ ++ while (pairs->name) ++ { ++ if (!ascii_strcasecmp(pairs->name, name)) ++ { ++ *value = pairs->value; ++ return true; ++ } ++ ++ pairs++; ++ } ++ ++ return false; ++} ++ ++static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; ++ struct hlsl_type *data_type = value->node.data_type; ++ struct hlsl_ctx *ctx = fx->ctx; ++ uint32_t i, type, offset; ++ unsigned int count = hlsl_type_component_count(data_type); ++ ++ offset = put_u32_unaligned(buffer, count); ++ ++ for (i = 0; i < count; ++i) ++ { ++ if (hlsl_is_numeric_type(data_type)) ++ { ++ switch (data_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ type = fx_4_numeric_base_type[data_type->e.numeric.type]; ++ break; ++ default: ++ type = 0; ++ hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); ++ } ++ } ++ ++ put_u32_unaligned(buffer, type); ++ put_u32_unaligned(buffer, value->value.u[i].u); ++ } ++ ++ return offset; ++} ++ ++static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, ++ struct fx_write_context *fx) ++{ ++ uint32_t value_offset = 0, assignment_type = 0, rhs_offset; ++ uint32_t type_offset; ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ struct hlsl_ctx *ctx = fx->ctx; ++ struct hlsl_ir_node *value = entry->args->node; ++ ++ if (entry->lhs_has_index) ++ hlsl_fixme(ctx, &var->loc, "Unsupported assignment to array element."); ++ ++ put_u32(buffer, entry->name_id); ++ put_u32(buffer, 0); /* TODO: destination index */ ++ type_offset = put_u32(buffer, 0); ++ rhs_offset = put_u32(buffer, 0); ++ ++ switch (value->type) ++ { ++ case HLSL_IR_CONSTANT: ++ { ++ struct hlsl_ir_constant *c = hlsl_ir_constant(value); ++ ++ value_offset = write_fx_4_state_numeric_value(c, fx); ++ assignment_type = 1; ++ break; ++ } ++ default: ++ hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); ++ } ++ ++ set_u32(buffer, type_offset, assignment_type); ++ set_u32(buffer, rhs_offset, value_offset); ++} ++ ++static bool state_block_contains_state(const char *name, unsigned int start, struct hlsl_state_block *block) ++{ ++ unsigned int i; ++ ++ for (i = start; i < block->count; ++i) ++ { ++ if (!ascii_strcasecmp(block->entries[i]->name, name)) ++ return true; ++ } ++ ++ return false; ++} ++ ++struct replace_state_context ++{ ++ const struct rhs_named_value *values; ++ struct hlsl_ir_var *var; ++}; ++ ++static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct replace_state_context *replace_context = context; ++ struct hlsl_ir_stateblock_constant *state_constant; ++ struct hlsl_ir_node *c; ++ unsigned int value; ++ ++ if (!replace_context->values) ++ return false; ++ if (instr->type != HLSL_IR_STATEBLOCK_CONSTANT) ++ return false; ++ ++ state_constant = hlsl_ir_stateblock_constant(instr); ++ if (!get_fx_4_state_enum_value(replace_context->values, state_constant->name, &value)) ++ { ++ hlsl_error(ctx, &replace_context->var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Unrecognized state constant %s.", state_constant->name); ++ return false; ++ } ++ ++ if (!(c = hlsl_new_uint_constant(ctx, value, &replace_context->var->loc))) ++ return false; ++ ++ list_add_before(&state_constant->node.entry, &c->entry); ++ hlsl_replace_node(&state_constant->node, c); ++ ++ return true; ++} ++ ++static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, ++ struct fx_write_context *fx) ++{ ++ static const struct rhs_named_value filter_values[] = ++ { ++ { "MIN_MAG_MIP_POINT", 0x00 }, ++ { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, ++ { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, ++ { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, ++ { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, ++ { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, ++ { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, ++ { "MIN_MAG_MIP_LINEAR", 0x15 }, ++ { "ANISOTROPIC", 0x55 }, ++ { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, ++ { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, ++ { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, ++ { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, ++ { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, ++ { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, ++ { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, ++ { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, ++ { "COMPARISON_ANISOTROPIC", 0xd5 }, ++ { NULL }, ++ }; ++ ++ static const struct rhs_named_value address_values[] = ++ { ++ { "WRAP", 1 }, ++ { "MIRROR", 2 }, ++ { "CLAMP", 3 }, ++ { "BORDER", 4 }, ++ { "MIRROR_ONCE", 5 }, ++ { NULL }, ++ }; ++ ++ static const struct rhs_named_value compare_func_values[] = ++ { ++ { "NEVER", 1 }, ++ { "LESS", 2 }, ++ { "EQUAL", 3 }, ++ { "LESS_EQUAL", 4 }, ++ { "GREATER", 5 }, ++ { "NOT_EQUAL", 6 }, ++ { "GREATER_EQUAL", 7 }, ++ { "ALWAYS", 8 }, ++ { NULL } ++ }; ++ ++ static const struct state ++ { ++ const char *name; ++ enum hlsl_type_class container; ++ enum hlsl_base_type type; ++ unsigned int dimx; ++ uint32_t id; ++ const struct rhs_named_value *values; ++ } ++ states[] = ++ { ++ { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, ++ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, ++ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, ++ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, ++ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, ++ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, ++ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, ++ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, ++ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, ++ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, ++ /* TODO: "Texture" field */ ++ }; ++ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); ++ struct replace_state_context replace_context; ++ struct hlsl_ir_node *node, *cast; ++ const struct state *state = NULL; ++ struct hlsl_ctx *ctx = fx->ctx; ++ struct hlsl_type *state_type; ++ unsigned int i; ++ bool progress; ++ ++ for (i = 0; i < ARRAY_SIZE(states); ++i) ++ { ++ if (type->class == states[i].container ++ && !ascii_strcasecmp(entry->name, states[i].name)) ++ { ++ state = &states[i]; ++ break; ++ } ++ } ++ ++ if (!state) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unrecognized state name %s.", entry->name); ++ return; ++ } ++ ++ if (entry->args_count != 1) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unrecognized initializer for the state %s.", ++ entry->name); ++ return; ++ } ++ ++ entry->name_id = state->id; ++ ++ replace_context.values = state->values; ++ replace_context.var = var; ++ ++ /* Turned named constants to actual constants. */ ++ hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); ++ ++ if (state->dimx) ++ state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); ++ else ++ state_type = hlsl_get_scalar_type(ctx, state->type); ++ ++ /* Cast to expected property type. */ ++ node = entry->args->node; ++ if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) ++ return; ++ list_add_after(&node->entry, &cast->entry); ++ ++ hlsl_src_remove(entry->args); ++ hlsl_src_from_node(entry->args, cast); ++ ++ do ++ { ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); ++ } while (progress); ++} ++ ++static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) ++{ ++ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i, j; ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t count_offset, count; ++ ++ for (i = 0; i < elements_count; ++i) ++ { ++ struct hlsl_state_block *block; ++ ++ count_offset = put_u32(buffer, 0); ++ ++ count = 0; ++ if (var->state_blocks) ++ { ++ block = var->state_blocks[i]; ++ ++ for (j = 0; j < block->count; ++j) ++ { ++ struct hlsl_state_block_entry *entry = block->entries[j]; ++ ++ /* Skip if property is reassigned later. This will use the last assignment. */ ++ if (state_block_contains_state(entry->name, j + 1, block)) ++ continue; ++ ++ /* Resolve special constant names and property names. */ ++ resolve_fx_4_state_block_values(var, entry, fx); ++ ++ write_fx_4_state_assignment(var, entry, fx); ++ ++count; ++ } ++ } ++ ++ set_u32(buffer, count_offset, count); ++ } } static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) @@ -5712,7 +8419,7 @@ index bc70d5220fd..98443797543 100644 if (var->reg_reservation.reg_type) bind_point = var->reg_reservation.reg_index; -@@ -712,8 +1002,47 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ +@@ -712,8 +1352,52 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ put_u32(buffer, bind_point); /* Explicit bind point */ @@ -5725,31 +8432,36 @@ index bc70d5220fd..98443797543 100644 + /* Initializer */ + switch (type->class) + { ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ fx->rtv_count += elements_count; ++ break; + case HLSL_CLASS_TEXTURE: ++ fx->texture_count += elements_count; ++ break; + case HLSL_CLASS_UAV: ++ fx->uav_count += elements_count; + break; + -+ case HLSL_CLASS_OBJECT: -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_RENDERTARGETVIEW: -+ break; -+ case HLSL_TYPE_PIXELSHADER: -+ case HLSL_TYPE_VERTEXSHADER: -+ /* FIXME: write shader blobs, once parser support works. */ -+ for (i = 0; i < elements_count; ++i) -+ put_u32(buffer, 0); -+ ++fx->shader_variable_count; -+ break; -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", -+ type->base_type); -+ } ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_VERTEX_SHADER: ++ /* FIXME: write shader blobs, once parser support works. */ ++ for (i = 0; i < elements_count; ++i) ++ put_u32(buffer, 0); ++ fx->shader_count += elements_count; ++ break; ++ ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ fx->dsv_count += elements_count; ++ break; ++ ++ case HLSL_CLASS_SAMPLER: ++ write_fx_4_state_object_initializer(var, fx); ++ fx->sampler_state_count += elements_count; + break; + + default: + hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", -+ type->base_type); ++ type->e.numeric.type); + } + put_u32(buffer, 0); /* Annotations count */ @@ -5761,7 +8473,15 @@ index bc70d5220fd..98443797543 100644 } static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx) -@@ -734,7 +1063,8 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx +@@ -729,12 +1413,16 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_ir_var *var; + uint32_t count_offset; ++ bool shared; ++ ++ shared = fx->child_effect && b->modifiers & HLSL_STORAGE_SHARED; + + if (b->reservation.reg_type) bind_point = b->reservation.reg_index; if (b->type == HLSL_BUFFER_TEXTURE) flags |= IS_TBUFFER; @@ -5771,18 +8491,44 @@ index bc70d5220fd..98443797543 100644 name_offset = write_string(b->name, fx); -@@ -745,7 +1075,8 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx +@@ -744,8 +1432,17 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + count_offset = put_u32(buffer, 0); put_u32(buffer, bind_point); /* Bind point */ - put_u32(buffer, 0); /* Annotations count */ +- put_u32(buffer, 0); /* Annotations count */ - /* FIXME: write annotations */ -+ if (b->annotations) -+ hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); ++ if (shared) ++ { ++ ++fx->shared_buffer_count; ++ } ++ else ++ { ++ put_u32(buffer, 0); /* Annotations count */ ++ if (b->annotations) ++ hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); ++ ++fx->buffer_count; ++ } count = 0; size = 0; -@@ -768,16 +1099,12 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - static void write_buffers(struct fx_write_context *fx) +@@ -754,73 +1451,76 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + if (var->buffer != b) + continue; + +- write_fx_4_numeric_variable(var, fx); ++ write_fx_4_numeric_variable(var, shared, fx); + size += get_fx_4_type_size(var->data_type); + ++count; + } + + set_u32(buffer, count_offset, count); + set_u32(buffer, size_offset, align(size, 16)); +- +- fx->numeric_variable_count += count; + } + +-static void write_buffers(struct fx_write_context *fx) ++static void write_buffers(struct fx_write_context *fx, bool shared) { struct hlsl_buffer *buffer; - struct hlsl_block block; @@ -5798,10 +8544,17 @@ index bc70d5220fd..98443797543 100644 + if (!buffer->size && !fx->include_empty_buffers) + continue; + if (!strcmp(buffer->name, "$Params")) ++ continue; ++ if (fx->child_effect && (shared != !!(buffer->modifiers & HLSL_STORAGE_SHARED))) continue; write_fx_4_buffer(buffer, fx); -@@ -789,38 +1116,46 @@ static bool is_object_variable(const struct hlsl_ir_var *var) +- ++fx->buffer_count; + } + } + +-static bool is_object_variable(const struct hlsl_ir_var *var) ++static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) { const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); @@ -5817,21 +8570,20 @@ index bc70d5220fd..98443797543 100644 - case HLSL_TYPE_PIXELSHADER: - case HLSL_TYPE_VERTEXSHADER: - case HLSL_TYPE_RENDERTARGETVIEW: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: ++ return true; + case HLSL_CLASS_UAV: ++ if (ctx->profile->major_version < 5) ++ return false; ++ if (type->e.resource.rasteriser_ordered) ++ return false; ++ return true; ++ case HLSL_CLASS_VERTEX_SHADER: return true; -+ -+ case HLSL_CLASS_OBJECT: -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_PIXELSHADER: -+ case HLSL_TYPE_VERTEXSHADER: -+ case HLSL_TYPE_RENDERTARGETVIEW: -+ return true; -+ default: -+ return false; -+ } + default: return false; @@ -5841,20 +8593,23 @@ index bc70d5220fd..98443797543 100644 -static void write_objects(struct fx_write_context *fx) +static void write_objects(struct fx_write_context *fx, bool shared) { ++ struct hlsl_ctx *ctx = fx->ctx; struct hlsl_ir_var *var; - uint32_t count = 0; -+ + +- LIST_FOR_EACH_ENTRY(var, &fx->ctx->extern_vars, struct hlsl_ir_var, extern_entry) + if (shared && !fx->child_effect) + return; - - LIST_FOR_EACH_ENTRY(var, &fx->ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!is_object_variable(var)) - continue; - -+ if (fx->child_effect && (shared != !!(var->storage_modifiers & HLSL_STORAGE_SHARED))) +- if (!is_object_variable(var)) ++ if (!is_supported_object_variable(ctx, var)) + continue; + ++ if (fx->child_effect && (shared != !!(var->storage_modifiers & HLSL_STORAGE_SHARED))) + continue; + write_fx_4_object_variable(var, fx); - ++count; } @@ -5863,37 +8618,51 @@ index bc70d5220fd..98443797543 100644 } static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) -@@ -834,9 +1169,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -833,10 +1533,10 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ - write_buffers(&fx); +- write_buffers(&fx); - write_objects(&fx); -+ write_objects(&fx, false); - /* TODO: shared buffers */ +- /* TODO: shared buffers */ - /* TODO: shared objects */ ++ write_buffers(&fx, false); ++ write_objects(&fx, false); ++ write_buffers(&fx, true); + write_objects(&fx, true); write_techniques(ctx->globals, &fx); -@@ -846,7 +1181,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -844,20 +1544,20 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, fx.buffer_count); /* Buffer count. */ + put_u32(&buffer, fx.numeric_variable_count); /* Numeric variable count. */ put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ - put_u32(&buffer, 0); /* Pool buffer count. */ - put_u32(&buffer, 0); /* Pool variable count. */ +- put_u32(&buffer, 0); /* Pool buffer count. */ +- put_u32(&buffer, 0); /* Pool variable count. */ - put_u32(&buffer, 0); /* Pool object count. */ -+ put_u32(&buffer, fx.shared_object_count); /* Shared object count. */ ++ put_u32(&buffer, fx.shared_buffer_count); ++ put_u32(&buffer, fx.shared_numeric_variable_count); ++ put_u32(&buffer, fx.shared_object_count); put_u32(&buffer, fx.technique_count); size_offset = put_u32(&buffer, 0); /* Unstructured size. */ put_u32(&buffer, 0); /* String count. */ -@@ -857,7 +1192,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, 0); /* Sampler state count. */ - put_u32(&buffer, 0); /* Rendertarget view count. */ - put_u32(&buffer, 0); /* Depth stencil view count. */ +- put_u32(&buffer, 0); /* Texture object count. */ ++ put_u32(&buffer, fx.texture_count); + put_u32(&buffer, 0); /* Depth stencil state count. */ + put_u32(&buffer, 0); /* Blend state count. */ + put_u32(&buffer, 0); /* Rasterizer state count. */ +- put_u32(&buffer, 0); /* Sampler state count. */ +- put_u32(&buffer, 0); /* Rendertarget view count. */ +- put_u32(&buffer, 0); /* Depth stencil view count. */ - put_u32(&buffer, 0); /* Shader count. */ -+ put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ ++ put_u32(&buffer, fx.sampler_state_count); ++ put_u32(&buffer, fx.rtv_count); ++ put_u32(&buffer, fx.dsv_count); ++ put_u32(&buffer, fx.shader_count); put_u32(&buffer, 0); /* Inline shader count. */ set_u32(&buffer, size_offset, fx.unstructured.size); -@@ -870,15 +1205,15 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -870,15 +1570,15 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) set_status(&fx, buffer.status); @@ -5913,25 +8682,51 @@ index bc70d5220fd..98443797543 100644 return fx_write_context_cleanup(&fx); } -@@ -893,7 +1228,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -892,8 +1592,8 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ - write_buffers(&fx); +- write_buffers(&fx); - write_objects(&fx); ++ write_buffers(&fx, false); + write_objects(&fx, false); /* TODO: interface variables */ write_groups(&fx); -@@ -915,7 +1250,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, 0); /* Sampler state count. */ - put_u32(&buffer, 0); /* Rendertarget view count. */ - put_u32(&buffer, 0); /* Depth stencil view count. */ +@@ -902,23 +1602,23 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, fx.buffer_count); /* Buffer count. */ + put_u32(&buffer, fx.numeric_variable_count); /* Numeric variable count. */ + put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ +- put_u32(&buffer, 0); /* Pool buffer count. */ +- put_u32(&buffer, 0); /* Pool variable count. */ +- put_u32(&buffer, 0); /* Pool object count. */ ++ put_u32(&buffer, fx.shared_buffer_count); ++ put_u32(&buffer, fx.shared_numeric_variable_count); ++ put_u32(&buffer, fx.shared_object_count); + put_u32(&buffer, fx.technique_count); + size_offset = put_u32(&buffer, 0); /* Unstructured size. */ + put_u32(&buffer, 0); /* String count. */ +- put_u32(&buffer, 0); /* Texture object count. */ ++ put_u32(&buffer, fx.texture_count); + put_u32(&buffer, 0); /* Depth stencil state count. */ + put_u32(&buffer, 0); /* Blend state count. */ + put_u32(&buffer, 0); /* Rasterizer state count. */ +- put_u32(&buffer, 0); /* Sampler state count. */ +- put_u32(&buffer, 0); /* Rendertarget view count. */ +- put_u32(&buffer, 0); /* Depth stencil view count. */ - put_u32(&buffer, 0); /* Shader count. */ -+ put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ ++ put_u32(&buffer, fx.sampler_state_count); ++ put_u32(&buffer, fx.rtv_count); ++ put_u32(&buffer, fx.dsv_count); ++ put_u32(&buffer, fx.shader_count); put_u32(&buffer, 0); /* Inline shader count. */ put_u32(&buffer, fx.group_count); /* Group count. */ - put_u32(&buffer, 0); /* UAV count. */ -@@ -933,15 +1268,15 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +- put_u32(&buffer, 0); /* UAV count. */ ++ put_u32(&buffer, fx.uav_count); + put_u32(&buffer, 0); /* Interface variables count. */ + put_u32(&buffer, 0); /* Interface variable element count. */ + put_u32(&buffer, 0); /* Class instance elements count. */ +@@ -933,15 +1633,15 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) set_status(&fx, buffer.status); @@ -5952,7 +8747,7 @@ index bc70d5220fd..98443797543 100644 } diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index bdd03c1e72a..3e8dd2c486b 100644 +index bdd03c1e72a..3e482a5fc70 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -20,29 +20,14 @@ @@ -6020,7 +8815,7 @@ index bdd03c1e72a..3e8dd2c486b 100644 vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); } } -@@ -73,6 +72,8 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, +@@ -73,48 +72,57 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, const struct vkd3d_shader_instruction *instruction) { @@ -6029,7 +8824,11 @@ index bdd03c1e72a..3e8dd2c486b 100644 switch (instruction->handler_idx) { case VKD3DSIH_DCL_INPUT: -@@ -83,38 +84,44 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator + case VKD3DSIH_DCL_OUTPUT: + case VKD3DSIH_DCL_OUTPUT_SIV: ++ case VKD3DSIH_NOP: + break; + case VKD3DSIH_RET: shader_glsl_ret(generator, instruction); break; default: @@ -6091,7 +8890,7 @@ index bdd03c1e72a..3e8dd2c486b 100644 out->code = code; } else return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -122,8 +129,33 @@ int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, +@@ -122,8 +130,33 @@ int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, return VKD3D_OK; } @@ -6129,17 +8928,21 @@ index bdd03c1e72a..3e8dd2c486b 100644 + return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 538f083df9c..5dd80ff1c3f 100644 +index 538f083df9c..99214fba6de 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -134,14 +134,39 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) +@@ -134,14 +134,43 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) return hlsl_get_var(scope->upper, name); } -void hlsl_free_var(struct hlsl_ir_var *decl) +static void free_state_block_entry(struct hlsl_state_block_entry *entry) +{ ++ unsigned int i; ++ + vkd3d_free(entry->name); ++ for (i = 0; i < entry->args_count; ++i) ++ hlsl_src_remove(&entry->args[i]); + vkd3d_free(entry->args); + hlsl_block_cleanup(entry->instrs); + vkd3d_free(entry->instrs); @@ -6173,7 +8976,7 @@ index 538f083df9c..5dd80ff1c3f 100644 vkd3d_free(decl); } -@@ -201,50 +226,46 @@ unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) +@@ -201,50 +230,46 @@ unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) bool hlsl_type_is_resource(const struct hlsl_type *type) { @@ -6251,7 +9054,7 @@ index 538f083df9c..5dd80ff1c3f 100644 } vkd3d_unreachable(); -@@ -330,16 +351,22 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type +@@ -330,16 +355,28 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type break; } @@ -6274,15 +9077,21 @@ index 538f083df9c..5dd80ff1c3f 100644 + type->reg_size[HLSL_REGSET_UAVS] = 1; + break; + -+ case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: break; - } } } -@@ -352,6 +379,25 @@ unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, +@@ -352,6 +389,25 @@ unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, return type->reg_size[regset]; } @@ -6308,19 +9117,31 @@ index 538f083df9c..5dd80ff1c3f 100644 static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class type_class, enum hlsl_base_type base_type, unsigned dimx, unsigned dimy) { -@@ -377,7 +423,26 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e +@@ -365,7 +421,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e + return NULL; + } + type->class = type_class; +- type->base_type = base_type; ++ type->e.numeric.type = base_type; + type->dimx = dimx; + type->dimy = dimy; + hlsl_type_calculate_reg_size(ctx, type); +@@ -377,7 +433,32 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e static bool type_is_single_component(const struct hlsl_type *type) { - return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_OBJECT; + switch (type->class) + { ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_OBJECT: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VERTEX_SHADER: + return true; + + case HLSL_CLASS_VECTOR: @@ -6329,6 +9150,9 @@ index 538f083df9c..5dd80ff1c3f 100644 + case HLSL_CLASS_ARRAY: + return false; + ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: + break; + } @@ -6336,23 +9160,66 @@ index 538f083df9c..5dd80ff1c3f 100644 } /* Given a type and a component index, this function moves one step through the path required to -@@ -497,10 +562,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty +@@ -400,7 +481,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, + { + case HLSL_CLASS_VECTOR: + assert(index < type->dimx); +- *type_ptr = hlsl_get_scalar_type(ctx, type->base_type); ++ *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); + *index_ptr = 0; + return index; + +@@ -410,7 +491,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, + bool row_major = hlsl_type_is_row_major(type); + + assert(index < type->dimx * type->dimy); +- *type_ptr = hlsl_get_vector_type(ctx, type->base_type, row_major ? type->dimx : type->dimy); ++ *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); + *index_ptr = row_major ? x : y; + return row_major ? y : x; + } +@@ -496,11 +577,21 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + } break; - case HLSL_CLASS_OBJECT: +- case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VERTEX_SHADER: assert(idx == 0); break; - default: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: vkd3d_unreachable(); } type = next_type; -@@ -727,7 +796,6 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, +@@ -674,13 +765,13 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co + switch (type->class) + { + case HLSL_CLASS_VECTOR: +- return hlsl_get_scalar_type(ctx, type->base_type); ++ return hlsl_get_scalar_type(ctx, type->e.numeric.type); + + case HLSL_CLASS_MATRIX: + if (hlsl_type_is_row_major(type)) +- return hlsl_get_vector_type(ctx, type->base_type, type->dimx); ++ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); + else +- return hlsl_get_vector_type(ctx, type->base_type, type->dimy); ++ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimy); + + case HLSL_CLASS_ARRAY: + return type->e.array.type; +@@ -727,7 +818,6 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; type->class = HLSL_CLASS_STRUCT; @@ -6360,7 +9227,7 @@ index 538f083df9c..5dd80ff1c3f 100644 type->name = name; type->dimy = 1; type->e.record.fields = fields; -@@ -746,8 +814,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ +@@ -746,8 +836,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; @@ -6370,7 +9237,7 @@ index 538f083df9c..5dd80ff1c3f 100644 type->dimx = 4; type->dimy = 1; type->sampler_dim = dim; -@@ -765,8 +832,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim +@@ -765,8 +854,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; @@ -6380,7 +9247,7 @@ index 538f083df9c..5dd80ff1c3f 100644 type->dimx = format->dimx; type->dimy = 1; type->sampler_dim = dim; -@@ -784,7 +850,10 @@ static const char * get_case_insensitive_typename(const char *name) +@@ -784,7 +872,10 @@ static const char * get_case_insensitive_typename(const char *name) "dword", "float", "matrix", @@ -6391,18 +9258,26 @@ index 538f083df9c..5dd80ff1c3f 100644 }; unsigned int i; -@@ -866,11 +935,17 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) +@@ -865,12 +956,24 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_ARRAY: return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; - case HLSL_CLASS_OBJECT: +- case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VERTEX_SHADER: return 1; - default: - vkd3d_unreachable(); ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: + break; } @@ -6411,30 +9286,156 @@ index 538f083df9c..5dd80ff1c3f 100644 } bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2) -@@ -882,16 +957,15 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - return false; - if (t1->base_type != t2->base_type) +@@ -880,56 +983,73 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + + if (t1->class != t2->class) return false; +- if (t1->base_type != t2->base_type) +- return false; - if (t1->base_type == HLSL_TYPE_SAMPLER || t1->base_type == HLSL_TYPE_TEXTURE - || t1->base_type == HLSL_TYPE_UAV) -+ if (t1->class == HLSL_CLASS_SAMPLER || t1->class == HLSL_CLASS_TEXTURE || t1->class == HLSL_CLASS_UAV) - { - if (t1->sampler_dim != t2->sampler_dim) - return false; +- { +- if (t1->sampler_dim != t2->sampler_dim) +- return false; - if ((t1->base_type == HLSL_TYPE_TEXTURE || t1->base_type == HLSL_TYPE_UAV) -+ if ((t1->class == HLSL_CLASS_TEXTURE || t1->class == HLSL_CLASS_UAV) - && t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC - && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) - return false; +- && t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC +- && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) +- return false; - if (t1->base_type == HLSL_TYPE_UAV && t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) -+ if (t1->class == HLSL_CLASS_UAV && t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) - return false; +- return false; +- } +- if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) +- != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) +- return false; +- if (t1->dimx != t2->dimx) +- return false; +- if (t1->dimy != t2->dimy) +- return false; +- if (t1->class == HLSL_CLASS_STRUCT) +- { +- size_t i; +- +- if (t1->e.record.field_count != t2->e.record.field_count) +- return false; + +- for (i = 0; i < t1->e.record.field_count; ++i) +- { +- const struct hlsl_struct_field *field1 = &t1->e.record.fields[i]; +- const struct hlsl_struct_field *field2 = &t2->e.record.fields[i]; ++ switch (t1->class) ++ { ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ if (t1->e.numeric.type != t2->e.numeric.type) ++ return false; ++ if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) ++ != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) ++ return false; ++ if (t1->dimx != t2->dimx) ++ return false; ++ if (t1->dimy != t2->dimy) ++ return false; ++ return true; + +- if (!hlsl_types_are_equal(field1->type, field2->type)) ++ case HLSL_CLASS_UAV: ++ if (t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) ++ return false; ++ /* fall through */ ++ case HLSL_CLASS_TEXTURE: ++ if (t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC ++ && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) + return false; ++ /* fall through */ ++ case HLSL_CLASS_SAMPLER: ++ if (t1->sampler_dim != t2->sampler_dim) ++ return false; ++ return true; + +- if (strcmp(field1->name, field2->name)) ++ case HLSL_CLASS_STRUCT: ++ if (t1->e.record.field_count != t2->e.record.field_count) + return false; +- } +- } +- if (t1->class == HLSL_CLASS_ARRAY) +- return t1->e.array.elements_count == t2->e.array.elements_count +- && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); +- if (t1->class == HLSL_CLASS_OBJECT) +- { +- if (t1->base_type == HLSL_TYPE_TECHNIQUE && t1->e.version != t2->e.version) +- return false; ++ ++ for (size_t i = 0; i < t1->e.record.field_count; ++i) ++ { ++ const struct hlsl_struct_field *field1 = &t1->e.record.fields[i]; ++ const struct hlsl_struct_field *field2 = &t2->e.record.fields[i]; ++ ++ if (!hlsl_types_are_equal(field1->type, field2->type)) ++ return false; ++ ++ if (strcmp(field1->name, field2->name)) ++ return false; ++ } ++ return true; ++ ++ case HLSL_CLASS_ARRAY: ++ return t1->e.array.elements_count == t2->e.array.elements_count ++ && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); ++ ++ case HLSL_CLASS_TECHNIQUE: ++ return t1->e.version == t2->e.version; ++ ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_VERTEX_SHADER: ++ case HLSL_CLASS_VOID: ++ return true; } - if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) -@@ -1008,14 +1082,16 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + +- return true; ++ vkd3d_unreachable(); + } + + struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, +@@ -950,7 +1070,6 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + } + } + type->class = old->class; +- type->base_type = old->base_type; + type->dimx = old->dimx; + type->dimy = old->dimy; + type->modifiers = old->modifiers | modifiers; +@@ -962,6 +1081,12 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + + switch (old->class) + { ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ type->e.numeric.type = old->e.numeric.type; ++ break; ++ + case HLSL_CLASS_ARRAY: + if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) + { +@@ -1008,14 +1133,15 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, break; } +- case HLSL_CLASS_OBJECT: +- if (type->base_type == HLSL_TYPE_TECHNIQUE) +- type->e.version = old->e.version; +- if (old->base_type == HLSL_TYPE_TEXTURE || old->base_type == HLSL_TYPE_UAV) +- { +- type->e.resource.format = old->e.resource.format; +- type->e.resource.rasteriser_ordered = old->e.resource.rasteriser_ordered; +- } + case HLSL_CLASS_UAV: + type->e.resource.rasteriser_ordered = old->e.resource.rasteriser_ordered; + /* fall through */ @@ -6442,18 +9443,12 @@ index 538f083df9c..5dd80ff1c3f 100644 + type->e.resource.format = old->e.resource.format; + break; + - case HLSL_CLASS_OBJECT: - if (type->base_type == HLSL_TYPE_TECHNIQUE) - type->e.version = old->e.version; -- if (old->base_type == HLSL_TYPE_TEXTURE || old->base_type == HLSL_TYPE_UAV) -- { -- type->e.resource.format = old->e.resource.format; -- type->e.resource.rasteriser_ordered = old->e.resource.rasteriser_ordered; -- } ++ case HLSL_CLASS_TECHNIQUE: ++ type->e.version = old->e.version; break; default: -@@ -1346,6 +1422,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp +@@ -1346,6 +1472,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } @@ -6470,7 +9465,20 @@ index 538f083df9c..5dd80ff1c3f 100644 struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { -@@ -1548,6 +1634,27 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned +@@ -1538,16 +1674,38 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned + + if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) + return NULL; ++ assert(hlsl_is_numeric_type(val->data_type)); + if (components == 1) +- type = hlsl_get_scalar_type(ctx, val->data_type->base_type); ++ type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); + else +- type = hlsl_get_vector_type(ctx, val->data_type->base_type, components); ++ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components); + init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); + hlsl_src_from_node(&swizzle->val, val); + swizzle->swizzle = s; return &swizzle->node; } @@ -6498,7 +9506,7 @@ index 538f083df9c..5dd80ff1c3f 100644 bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) { struct hlsl_type *type = index->val.node->data_type; -@@ -1557,7 +1664,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) +@@ -1557,7 +1715,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) { @@ -6509,7 +9517,7 @@ index 538f083df9c..5dd80ff1c3f 100644 } bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index) -@@ -1578,7 +1687,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v +@@ -1578,10 +1738,10 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v if (!(index = hlsl_alloc(ctx, sizeof(*index)))) return NULL; @@ -6517,8 +9525,12 @@ index 538f083df9c..5dd80ff1c3f 100644 + if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV) type = type->e.resource.format; else if (type->class == HLSL_CLASS_MATRIX) - type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); -@@ -1868,6 +1977,12 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr +- type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); ++ type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); + else + type = hlsl_get_element_type_from_path_index(ctx, type, idx); + +@@ -1868,6 +2028,12 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr return dst; } @@ -6531,7 +9543,7 @@ index 538f083df9c..5dd80ff1c3f 100644 void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) { hlsl_block_cleanup(&c->body); -@@ -1963,6 +2078,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, +@@ -1963,6 +2129,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_SWIZZLE: return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); @@ -6541,7 +9553,7 @@ index 538f083df9c..5dd80ff1c3f 100644 } vkd3d_unreachable(); -@@ -2018,7 +2136,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, +@@ -2018,7 +2187,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, } struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, @@ -6551,7 +9563,7 @@ index 538f083df9c..5dd80ff1c3f 100644 { struct hlsl_buffer *buffer; -@@ -2026,8 +2145,10 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type +@@ -2026,8 +2196,10 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type return NULL; buffer->type = type; buffer->name = name; @@ -6562,7 +9574,7 @@ index 538f083df9c..5dd80ff1c3f 100644 buffer->loc = *loc; list_add_tail(&ctx->buffers, &buffer->entry); return buffer; -@@ -2130,6 +2251,19 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -2130,6 +2302,19 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru [HLSL_TYPE_BOOL] = "bool", }; @@ -6582,7 +9594,32 @@ index 538f083df9c..5dd80ff1c3f 100644 if (!(string = hlsl_get_string_buffer(ctx))) return NULL; -@@ -2183,71 +2317,53 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -2142,18 +2327,18 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + switch (type->class) + { + case HLSL_CLASS_SCALAR: +- assert(type->base_type < ARRAY_SIZE(base_types)); +- vkd3d_string_buffer_printf(string, "%s", base_types[type->base_type]); ++ assert(type->e.numeric.type < ARRAY_SIZE(base_types)); ++ vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); + return string; + + case HLSL_CLASS_VECTOR: +- assert(type->base_type < ARRAY_SIZE(base_types)); +- vkd3d_string_buffer_printf(string, "%s%u", base_types[type->base_type], type->dimx); ++ assert(type->e.numeric.type < ARRAY_SIZE(base_types)); ++ vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); + return string; + + case HLSL_CLASS_MATRIX: +- assert(type->base_type < ARRAY_SIZE(base_types)); +- vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->base_type], type->dimy, type->dimx); ++ assert(type->e.numeric.type < ARRAY_SIZE(base_types)); ++ vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); + return string; + + case HLSL_CLASS_ARRAY: +@@ -2183,71 +2368,60 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru vkd3d_string_buffer_printf(string, ""); return string; @@ -6649,7 +9686,8 @@ index 538f083df9c..5dd80ff1c3f 100644 - default: - vkd3d_string_buffer_printf(string, ""); - return string; -+ assert(type->e.resource.format->base_type < ARRAY_SIZE(base_types)); ++ assert(hlsl_is_numeric_type(type->e.resource.format)); ++ assert(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) + { + vkd3d_string_buffer_printf(string, "Buffer"); @@ -6683,9 +9721,15 @@ index 538f083df9c..5dd80ff1c3f 100644 + } return string; + -+ case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: + break; } @@ -6695,7 +9739,16 @@ index 538f083df9c..5dd80ff1c3f 100644 } struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -@@ -2611,10 +2727,10 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) +@@ -2525,7 +2699,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl + { + const union hlsl_constant_value_component *value = &constant->value.u[x]; + +- switch (type->base_type) ++ switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + vkd3d_string_buffer_printf(buffer, "%s ", value->u ? "true" : "false"); +@@ -2611,10 +2785,10 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP2_MUL] = "*", [HLSL_OP2_NEQUAL] = "!=", [HLSL_OP2_RSHIFT] = ">>", @@ -6707,7 +9760,7 @@ index 538f083df9c..5dd80ff1c3f 100644 [HLSL_OP3_TERNARY] = "ternary", }; -@@ -2791,6 +2907,12 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ +@@ -2791,6 +2965,12 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ vkd3d_string_buffer_printf(buffer, "]"); } @@ -6720,7 +9773,7 @@ index 538f083df9c..5dd80ff1c3f 100644 static void dump_ir_switch(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_switch *s) { struct hlsl_ir_switch_case *c; -@@ -2879,6 +3001,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, +@@ -2879,6 +3059,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, case HLSL_IR_SWIZZLE: dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); break; @@ -6731,7 +9784,7 @@ index 538f083df9c..5dd80ff1c3f 100644 } } -@@ -3051,6 +3177,12 @@ static void free_ir_index(struct hlsl_ir_index *index) +@@ -3051,6 +3235,12 @@ static void free_ir_index(struct hlsl_ir_index *index) vkd3d_free(index); } @@ -6744,7 +9797,7 @@ index 538f083df9c..5dd80ff1c3f 100644 void hlsl_free_instr(struct hlsl_ir_node *node) { assert(list_empty(&node->uses)); -@@ -3108,6 +3240,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) +@@ -3108,6 +3298,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) case HLSL_IR_SWITCH: free_ir_switch(hlsl_ir_switch(node)); break; @@ -6755,7 +9808,7 @@ index 538f083df9c..5dd80ff1c3f 100644 } } -@@ -3273,7 +3409,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) +@@ -3273,7 +3467,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"cs_4_0", VKD3D_SHADER_TYPE_COMPUTE, 4, 0, 0, 0, false}, {"cs_4_1", VKD3D_SHADER_TYPE_COMPUTE, 4, 1, 0, 0, false}, {"cs_5_0", VKD3D_SHADER_TYPE_COMPUTE, 5, 0, 0, 0, false}, @@ -6765,7 +9818,7 @@ index 538f083df9c..5dd80ff1c3f 100644 {"fx_2_0", VKD3D_SHADER_TYPE_EFFECT, 2, 0, 0, 0, false}, {"fx_4_0", VKD3D_SHADER_TYPE_EFFECT, 4, 0, 0, 0, false}, {"fx_4_1", VKD3D_SHADER_TYPE_EFFECT, 4, 1, 0, 0, false}, -@@ -3281,7 +3419,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) +@@ -3281,7 +3477,9 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"gs_4_0", VKD3D_SHADER_TYPE_GEOMETRY, 4, 0, 0, 0, false}, {"gs_4_1", VKD3D_SHADER_TYPE_GEOMETRY, 4, 1, 0, 0, false}, {"gs_5_0", VKD3D_SHADER_TYPE_GEOMETRY, 5, 0, 0, 0, false}, @@ -6775,7 +9828,7 @@ index 538f083df9c..5dd80ff1c3f 100644 {"ps.1.0", VKD3D_SHADER_TYPE_PIXEL, 1, 0, 0, 0, false}, {"ps.1.1", VKD3D_SHADER_TYPE_PIXEL, 1, 1, 0, 0, false}, {"ps.1.2", VKD3D_SHADER_TYPE_PIXEL, 1, 2, 0, 0, false}, -@@ -3309,6 +3449,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) +@@ -3309,6 +3507,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"ps_4_0_level_9_3", VKD3D_SHADER_TYPE_PIXEL, 4, 0, 9, 3, false}, {"ps_4_1", VKD3D_SHADER_TYPE_PIXEL, 4, 1, 0, 0, false}, {"ps_5_0", VKD3D_SHADER_TYPE_PIXEL, 5, 0, 0, 0, false}, @@ -6783,7 +9836,7 @@ index 538f083df9c..5dd80ff1c3f 100644 {"tx_1_0", VKD3D_SHADER_TYPE_TEXTURE, 1, 0, 0, 0, false}, {"vs.1.0", VKD3D_SHADER_TYPE_VERTEX, 1, 0, 0, 0, false}, {"vs.1.1", VKD3D_SHADER_TYPE_VERTEX, 1, 1, 0, 0, false}, -@@ -3330,6 +3471,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) +@@ -3330,6 +3529,7 @@ const struct hlsl_profile_info *hlsl_get_target_info(const char *target) {"vs_4_0_level_9_3", VKD3D_SHADER_TYPE_VERTEX, 4, 0, 9, 3, false}, {"vs_4_1", VKD3D_SHADER_TYPE_VERTEX, 4, 1, 0, 0, false}, {"vs_5_0", VKD3D_SHADER_TYPE_VERTEX, 5, 0, 0, 0, false}, @@ -6791,20 +9844,111 @@ index 538f083df9c..5dd80ff1c3f 100644 }; for (i = 0; i < ARRAY_SIZE(profiles); ++i) -@@ -3393,10 +3535,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) +@@ -3351,6 +3551,7 @@ static int compare_function_rb(const void *key, const struct rb_entry *entry) + + static void declare_predefined_types(struct hlsl_ctx *ctx) + { ++ struct vkd3d_string_buffer *name; + unsigned int x, y, bt, i, v; + struct hlsl_type *type; + +@@ -3363,7 +3564,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + "uint", + "bool", + }; +- char name[15]; + + static const char *const variants_float[] = {"min10float", "min16float"}; + static const char *const variants_int[] = {"min12int", "min16int"}; +@@ -3391,14 +3591,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, + {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, - {"fxgroup", HLSL_CLASS_OBJECT, HLSL_TYPE_EFFECT_GROUP, 1, 1}, - {"pass", HLSL_CLASS_OBJECT, HLSL_TYPE_PASS, 1, 1}, +- {"fxgroup", HLSL_CLASS_OBJECT, HLSL_TYPE_EFFECT_GROUP, 1, 1}, +- {"pass", HLSL_CLASS_OBJECT, HLSL_TYPE_PASS, 1, 1}, - {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, - {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, - {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, - {"VERTEXSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, -+ {"pixelshader", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, -+ {"vertexshader", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, - {"RenderTargetView",HLSL_CLASS_OBJECT, HLSL_TYPE_RENDERTARGETVIEW, 1, 1}, - {"DepthStencilView",HLSL_CLASS_OBJECT, HLSL_TYPE_DEPTHSTENCILVIEW, 1, 1}, +- {"RenderTargetView",HLSL_CLASS_OBJECT, HLSL_TYPE_RENDERTARGETVIEW, 1, 1}, +- {"DepthStencilView",HLSL_CLASS_OBJECT, HLSL_TYPE_DEPTHSTENCILVIEW, 1, 1}, }; -@@ -3504,12 +3644,14 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + + static const struct +@@ -3413,28 +3605,34 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + {"technique11", 11}, + }; + ++ if (!(name = hlsl_get_string_buffer(ctx))) ++ return; ++ + for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) + { + for (y = 1; y <= 4; ++y) + { + for (x = 1; x <= 4; ++x) + { +- sprintf(name, "%s%ux%u", names[bt], y, x); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s%ux%u", names[bt], y, x); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); + hlsl_scope_add_type(ctx->globals, type); + ctx->builtin_types.matrix[bt][x - 1][y - 1] = type; + + if (y == 1) + { +- sprintf(name, "%s%u", names[bt], x); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s%u", names[bt], x); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); + hlsl_scope_add_type(ctx->globals, type); + ctx->builtin_types.vector[bt][x - 1] = type; + + if (x == 1) + { +- sprintf(name, "%s", names[bt]); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s", names[bt]); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); + hlsl_scope_add_type(ctx->globals, type); + ctx->builtin_types.scalar[bt] = type; + } +@@ -3477,22 +3675,25 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + { + for (x = 1; x <= 4; ++x) + { +- sprintf(name, "%s%ux%u", variants[v], y, x); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s%ux%u", variants[v], y, x); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); + type->is_minimum_precision = 1; + hlsl_scope_add_type(ctx->globals, type); + + if (y == 1) + { +- sprintf(name, "%s%u", variants[v], x); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s%u", variants[v], x); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); + type->is_minimum_precision = 1; + hlsl_scope_add_type(ctx->globals, type); + + if (x == 1) + { +- sprintf(name, "%s", variants[v]); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s", variants[v]); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); + type->is_minimum_precision = 1; + hlsl_scope_add_type(ctx->globals, type); + } +@@ -3504,12 +3705,20 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) for (bt = 0; bt <= HLSL_SAMPLER_DIM_LAST_SAMPLER; ++bt) { @@ -6816,12 +9960,32 @@ index 538f083df9c..5dd80ff1c3f 100644 - ctx->builtin_types.Void = hlsl_new_type(ctx, "void", HLSL_CLASS_OBJECT, HLSL_TYPE_VOID, 1, 1); + ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "vertexshader", HLSL_CLASS_VERTEX_SHADER)); for (i = 0; i < ARRAY_SIZE(effect_types); ++i) { -@@ -3571,27 +3713,46 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil +@@ -3520,10 +3729,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + + for (i = 0; i < ARRAY_SIZE(technique_types); ++i) + { +- type = hlsl_new_type(ctx, technique_types[i].name, HLSL_CLASS_OBJECT, HLSL_TYPE_TECHNIQUE, 1, 1); ++ type = hlsl_new_simple_type(ctx, technique_types[i].name, HLSL_CLASS_TECHNIQUE); + type->e.version = technique_types[i].version; + hlsl_scope_add_type(ctx->globals, type); + } ++ ++ hlsl_release_string_buffer(ctx, name); + } + + static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, +@@ -3571,27 +3782,46 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil list_init(&ctx->buffers); if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, @@ -6879,7 +10043,7 @@ index 538f083df9c..5dd80ff1c3f 100644 } } -@@ -3615,6 +3776,21 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) +@@ -3615,6 +3845,21 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) rb_destroy(&ctx->functions, free_function_rb, NULL); @@ -6901,7 +10065,7 @@ index 538f083df9c..5dd80ff1c3f 100644 LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) { LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) -@@ -3638,6 +3814,7 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) +@@ -3638,6 +3883,7 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { @@ -6909,7 +10073,7 @@ index 538f083df9c..5dd80ff1c3f 100644 const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; struct hlsl_ir_function_decl *decl, *entry_func = NULL; const struct hlsl_profile_info *profile; -@@ -3659,25 +3836,25 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d +@@ -3659,25 +3905,25 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d return VKD3D_ERROR_NOT_IMPLEMENTED; } @@ -6939,7 +10103,7 @@ index 538f083df9c..5dd80ff1c3f 100644 { vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, "The '%s' target profile is incompatible with the 'fx' target type.", profile->name); -@@ -3741,8 +3918,40 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d +@@ -3741,8 +3987,41 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d return VKD3D_ERROR_INVALID_SHADER; } @@ -6948,27 +10112,28 @@ index 538f083df9c..5dd80ff1c3f 100644 + || target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT + || target_type == VKD3D_SHADER_TARGET_D3D_ASM) + { ++ uint64_t config_flags = vkd3d_shader_init_config_flags(); + struct vkd3d_shader_compile_info info = *compile_info; -+ struct vkd3d_shader_parser *parser; ++ struct vsir_program program; + + if (profile->major_version < 4) + { + if ((ret = hlsl_emit_bytecode(&ctx, entry_func, VKD3D_SHADER_TARGET_D3D_BYTECODE, &info.source)) < 0) + goto done; + info.source_type = VKD3D_SHADER_SOURCE_D3D_BYTECODE; -+ ret = vkd3d_shader_sm1_parser_create(&info, message_context, &parser); ++ ret = d3dbc_parse(&info, config_flags, message_context, &program); + } + else + { + if ((ret = hlsl_emit_bytecode(&ctx, entry_func, VKD3D_SHADER_TARGET_DXBC_TPF, &info.source)) < 0) + goto done; + info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; -+ ret = vkd3d_shader_sm4_parser_create(&info, message_context, &parser); ++ ret = tpf_parse(&info, config_flags, message_context, &program); + } + if (ret >= 0) + { -+ ret = vkd3d_shader_parser_compile(parser, &info, out, message_context); -+ vkd3d_shader_parser_destroy(parser); ++ ret = vsir_program_compile(&program, config_flags, &info, out, message_context); ++ vsir_program_cleanup(&program); + } + vkd3d_shader_free_shader_code(&info.source); + } @@ -6982,46 +10147,62 @@ index 538f083df9c..5dd80ff1c3f 100644 return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index df0a53b20de..7a8fe4de437 100644 +index df0a53b20de..27814f3a56f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -79,6 +79,11 @@ enum hlsl_type_class +@@ -78,7 +78,18 @@ enum hlsl_type_class + HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, HLSL_CLASS_STRUCT, HLSL_CLASS_ARRAY, - HLSL_CLASS_OBJECT, +- HLSL_CLASS_OBJECT, ++ HLSL_CLASS_DEPTH_STENCIL_VIEW, ++ HLSL_CLASS_EFFECT_GROUP, ++ HLSL_CLASS_PASS, ++ HLSL_CLASS_PIXEL_SHADER, ++ HLSL_CLASS_RENDER_TARGET_VIEW, + HLSL_CLASS_SAMPLER, + HLSL_CLASS_STRING, ++ HLSL_CLASS_TECHNIQUE, + HLSL_CLASS_TEXTURE, + HLSL_CLASS_UAV, ++ HLSL_CLASS_VERTEX_SHADER, + HLSL_CLASS_VOID, }; enum hlsl_base_type -@@ -90,9 +95,6 @@ enum hlsl_base_type +@@ -90,18 +101,6 @@ enum hlsl_base_type HLSL_TYPE_UINT, HLSL_TYPE_BOOL, HLSL_TYPE_LAST_SCALAR = HLSL_TYPE_BOOL, - HLSL_TYPE_SAMPLER, - HLSL_TYPE_TEXTURE, - HLSL_TYPE_UAV, - HLSL_TYPE_PIXELSHADER, - HLSL_TYPE_VERTEXSHADER, - HLSL_TYPE_PASS, -@@ -100,8 +102,6 @@ enum hlsl_base_type - HLSL_TYPE_DEPTHSTENCILVIEW, - HLSL_TYPE_TECHNIQUE, - HLSL_TYPE_EFFECT_GROUP, +- HLSL_TYPE_PIXELSHADER, +- HLSL_TYPE_VERTEXSHADER, +- HLSL_TYPE_PASS, +- HLSL_TYPE_RENDERTARGETVIEW, +- HLSL_TYPE_DEPTHSTENCILVIEW, +- HLSL_TYPE_TECHNIQUE, +- HLSL_TYPE_EFFECT_GROUP, - HLSL_TYPE_STRING, - HLSL_TYPE_VOID, }; enum hlsl_sampler_dim -@@ -150,10 +150,10 @@ struct hlsl_type - * Otherwise, base_type is not used. */ - enum hlsl_base_type base_type; +@@ -143,17 +142,11 @@ struct hlsl_type + struct rb_entry scope_entry; + enum hlsl_type_class class; +- /* If class is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. +- * If class is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. +- * If class is HLSL_CLASS_OBJECT and base_type is HLSL_TYPE_TECHNIQUE, additional version +- * field is used to distinguish between technique types. +- * Otherwise, base_type is not used. */ +- enum hlsl_base_type base_type; +- - /* If base_type is HLSL_TYPE_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. - * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim can be any value of the enum except ++ + /* If class is HLSL_CLASS_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. + * If class is HLSL_CLASS_TEXTURE, then sampler_dim can be any value of the enum except * HLSL_SAMPLER_DIM_GENERIC and HLSL_SAMPLER_DIM_COMPARISON. @@ -7030,7 +10211,7 @@ index df0a53b20de..7a8fe4de437 100644 * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, HLSL_SAMPLER_DIM_2DARRAY, * HLSL_SAMPLER_DIM_BUFFER, or HLSL_SAMPLER_DIM_STRUCTURED_BUFFER. * Otherwise, sampler_dim is not used */ -@@ -171,11 +171,7 @@ struct hlsl_type +@@ -171,11 +164,7 @@ struct hlsl_type * If type is HLSL_CLASS_MATRIX, then dimx is the number of columns, and dimy the number of rows. * If type is HLSL_CLASS_ARRAY, then dimx and dimy have the same value as in the type of the array elements. * If type is HLSL_CLASS_STRUCT, then dimx is the sum of (dimx * dimy) of every component, and dimy = 1. @@ -7043,7 +10224,19 @@ index df0a53b20de..7a8fe4de437 100644 unsigned int dimx; unsigned int dimy; /* Sample count for HLSL_SAMPLER_DIM_2DMS or HLSL_SAMPLER_DIM_2DMSARRAY. */ -@@ -196,8 +192,8 @@ struct hlsl_type +@@ -183,6 +172,11 @@ struct hlsl_type + + union + { ++ /* Additional information if type is numeric. */ ++ struct ++ { ++ enum hlsl_base_type type; ++ } numeric; + /* Additional information if type is HLSL_CLASS_STRUCT. */ + struct + { +@@ -196,8 +190,8 @@ struct hlsl_type /* Array length, or HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT if it is not known yet at parse time. */ unsigned int elements_count; } array; @@ -7054,7 +10247,7 @@ index df0a53b20de..7a8fe4de437 100644 struct { /* Format of the data contained within the type. */ -@@ -298,6 +294,7 @@ enum hlsl_ir_node_type +@@ -298,6 +292,7 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, @@ -7062,7 +10255,7 @@ index df0a53b20de..7a8fe4de437 100644 }; /* Common data for every type of IR instruction node. */ -@@ -374,6 +371,8 @@ struct hlsl_attribute +@@ -374,6 +369,8 @@ struct hlsl_attribute #define HLSL_STORAGE_CENTROID 0x00004000 #define HLSL_STORAGE_NOPERSPECTIVE 0x00008000 #define HLSL_STORAGE_LINEAR 0x00010000 @@ -7071,7 +10264,16 @@ index df0a53b20de..7a8fe4de437 100644 #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -421,6 +420,14 @@ struct hlsl_ir_var +@@ -393,7 +390,7 @@ struct hlsl_attribute + struct hlsl_reg_reservation + { + char reg_type; +- unsigned int reg_index; ++ unsigned int reg_space, reg_index; + + char offset_type; + unsigned int offset_index; +@@ -421,6 +418,14 @@ struct hlsl_ir_var /* Scope that contains annotations for this variable. */ struct hlsl_scope *annotations; @@ -7086,7 +10288,21 @@ index df0a53b20de..7a8fe4de437 100644 /* Indexes of the IR instructions where the variable is first written and last read (liveness * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 * means function entry. */ -@@ -456,6 +463,38 @@ struct hlsl_ir_var +@@ -442,9 +447,10 @@ struct hlsl_ir_var + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; +- /* Minimum number of binds required to include all object components actually used in the shader. +- * It may be less than the allocation size, e.g. for texture arrays. */ +- unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; ++ /* Minimum number of binds required to include all components actually used in the shader. ++ * It may be less than the allocation size, e.g. for texture arrays. ++ * The bind_count for HLSL_REGSET_NUMERIC is only used in uniforms for now. */ ++ unsigned int bind_count[HLSL_REGSET_LAST + 1]; + + /* Whether the shader performs dereferences with non-constant offsets in the variable. */ + bool indexable; +@@ -456,6 +462,40 @@ struct hlsl_ir_var uint32_t is_separated_resource : 1; }; @@ -7103,6 +10319,8 @@ index df0a53b20de..7a8fe4de437 100644 +{ + /* For assignments, the name in the lhs. */ + char *name; ++ /* Resolved format-specific property identifier. */ ++ unsigned int name_id; + + /* Whether the lhs in the assignment is indexed and, in that case, its index. */ + bool lhs_has_index; @@ -7112,7 +10330,7 @@ index df0a53b20de..7a8fe4de437 100644 + struct hlsl_block *instrs; + + /* For assignments, arguments of the rhs initializer. */ -+ struct hlsl_ir_node **args; ++ struct hlsl_src *args; + unsigned int args_count; +}; + @@ -7125,7 +10343,7 @@ index df0a53b20de..7a8fe4de437 100644 /* Sized array of variables representing a function's parameters. */ struct hlsl_func_parameters { -@@ -593,18 +632,15 @@ enum hlsl_ir_expr_op +@@ -593,18 +633,15 @@ enum hlsl_ir_expr_op HLSL_OP2_MUL, HLSL_OP2_NEQUAL, HLSL_OP2_RSHIFT, @@ -7148,7 +10366,7 @@ index df0a53b20de..7a8fe4de437 100644 HLSL_OP3_TERNARY, }; -@@ -750,6 +786,14 @@ struct hlsl_ir_constant +@@ -750,6 +787,14 @@ struct hlsl_ir_constant struct hlsl_reg reg; }; @@ -7163,7 +10381,7 @@ index df0a53b20de..7a8fe4de437 100644 struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ -@@ -798,10 +842,13 @@ struct hlsl_buffer +@@ -798,10 +843,13 @@ struct hlsl_buffer struct vkd3d_shader_location loc; enum hlsl_buffer_type type; const char *name; @@ -7177,7 +10395,7 @@ index df0a53b20de..7a8fe4de437 100644 /* Item entry for hlsl_ctx.buffers */ struct list entry; -@@ -920,8 +967,21 @@ struct hlsl_ctx +@@ -920,8 +968,21 @@ struct hlsl_ctx uint32_t found_numthreads : 1; bool semantic_compat_mapping; @@ -7199,7 +10417,7 @@ index df0a53b20de..7a8fe4de437 100644 struct hlsl_resource_load_params { struct hlsl_type *format; -@@ -1009,6 +1069,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n +@@ -1009,6 +1070,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); } @@ -7212,7 +10430,15 @@ index df0a53b20de..7a8fe4de437 100644 static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); -@@ -1201,6 +1267,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); +@@ -1183,6 +1250,7 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const + + void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); + ++void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); + int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); +@@ -1201,6 +1269,7 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new); void hlsl_free_attribute(struct hlsl_attribute *attr); void hlsl_free_instr(struct hlsl_ir_node *node); void hlsl_free_instr_list(struct list *list); @@ -7220,7 +10446,7 @@ index df0a53b20de..7a8fe4de437 100644 void hlsl_free_type(struct hlsl_type *type); void hlsl_free_var(struct hlsl_ir_var *decl); -@@ -1222,7 +1289,8 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp +@@ -1222,7 +1291,8 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp struct hlsl_ir_node *arg2); struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, @@ -7230,7 +10456,7 @@ index df0a53b20de..7a8fe4de437 100644 struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, -@@ -1243,6 +1311,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond +@@ -1243,6 +1313,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); @@ -7239,7 +10465,7 @@ index df0a53b20de..7a8fe4de437 100644 void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); -@@ -1279,6 +1349,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, +@@ -1279,6 +1351,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); @@ -7248,7 +10474,20 @@ index df0a53b20de..7a8fe4de437 100644 struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -@@ -1356,6 +1428,8 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst +@@ -1330,7 +1404,6 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int + bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); + + void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx); +-void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block); + + const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); + unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); +@@ -1352,10 +1425,13 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere + + bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); + bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); ++bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), struct hlsl_block *block, void *context); @@ -7306,7 +10545,7 @@ index 558506db108..88b917eff11 100644 volatile {return KW_VOLATILE; } while {return KW_WHILE; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index cd05fd008a6..0c196b77595 100644 +index cd05fd008a6..9c1bdef926d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -77,6 +77,10 @@ struct parse_variable_def @@ -7333,7 +10572,17 @@ index cd05fd008a6..0c196b77595 100644 } %code provides -@@ -413,7 +423,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct +@@ -158,6 +168,9 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) + + static void destroy_block(struct hlsl_block *block) + { ++ if (!block) ++ return; ++ + hlsl_block_cleanup(block); + vkd3d_free(block); + } +@@ -413,7 +426,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct return NULL; } @@ -7342,7 +10591,7 @@ index cd05fd008a6..0c196b77595 100644 hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); -@@ -438,8 +448,9 @@ static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t +@@ -438,8 +451,9 @@ static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) { @@ -7353,7 +10602,7 @@ index cd05fd008a6..0c196b77595 100644 /* E.g. "for (i = 0; ; ++i)". */ if (list_empty(&cond_block->instrs)) -@@ -449,7 +460,12 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co +@@ -449,7 +463,12 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co check_condition_type(ctx, condition); @@ -7367,7 +10616,24 @@ index cd05fd008a6..0c196b77595 100644 return false; hlsl_block_add_instr(cond_block, not); -@@ -817,8 +833,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str +@@ -640,6 +659,16 @@ static unsigned int initializer_size(const struct parse_initializer *initializer + return count; + } + ++static void cleanup_parse_attribute_list(struct parse_attribute_list *attr_list) ++{ ++ unsigned int i = 0; ++ ++ assert(attr_list); ++ for (i = 0; i < attr_list->count; ++i) ++ hlsl_free_attribute((struct hlsl_attribute *) attr_list->attrs[i]); ++ vkd3d_free(attr_list->attrs); ++} ++ + static void free_parse_initializer(struct parse_initializer *initializer) + { + destroy_block(initializer->instrs); +@@ -817,8 +846,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; struct hlsl_ir_node *return_index, *cast; @@ -7377,7 +10643,7 @@ index cd05fd008a6..0c196b77595 100644 && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); -@@ -925,24 +940,10 @@ static void free_parse_variable_def(struct parse_variable_def *v) +@@ -925,24 +953,10 @@ static void free_parse_variable_def(struct parse_variable_def *v) vkd3d_free(v->arrays.sizes); vkd3d_free(v->name); hlsl_cleanup_semantic(&v->semantic); @@ -7403,7 +10669,7 @@ index cd05fd008a6..0c196b77595 100644 static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, struct hlsl_type *type, uint32_t modifiers, struct list *defs) { -@@ -965,7 +966,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, +@@ -965,7 +979,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, field->type = type; @@ -7412,7 +10678,7 @@ index cd05fd008a6..0c196b77595 100644 { for (k = 0; k < v->arrays.count; ++k) unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -1115,7 +1116,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters +@@ -1115,7 +1129,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters } static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope *annotations, @@ -7421,7 +10687,7 @@ index cd05fd008a6..0c196b77595 100644 { struct hlsl_ir_var *var; struct hlsl_type *type; -@@ -1125,6 +1126,11 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope * +@@ -1125,6 +1139,11 @@ static bool add_pass(struct hlsl_ctx *ctx, const char *name, struct hlsl_scope * return false; var->annotations = annotations; @@ -7433,7 +10699,35 @@ index cd05fd008a6..0c196b77595 100644 if (!hlsl_add_var(ctx, var, false)) { struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -@@ -1210,7 +1216,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const +@@ -1191,17 +1210,18 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl + return true; + } + +-static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) ++static bool parse_reservation_index(const char *string, char *type, uint32_t *index) + { +- struct hlsl_reg_reservation reservation = {0}; ++ if (!sscanf(string + 1, "%u", index)) ++ return false; + +- if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) +- { +- FIXME("Unsupported register reservation syntax.\n"); +- return reservation; +- } +- reservation.reg_type = ascii_tolower(reg_string[0]); +- return reservation; ++ *type = ascii_tolower(string[0]); ++ return true; ++} ++ ++static bool parse_reservation_space(const char *string, uint32_t *space) ++{ ++ return !ascii_strncasecmp(string, "space", 5) && sscanf(string + 5, "%u", space); + } + + static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, +@@ -1210,7 +1230,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const struct hlsl_reg_reservation reservation = {0}; char *endptr; @@ -7442,7 +10736,16 @@ index cd05fd008a6..0c196b77595 100644 return reservation; reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); -@@ -1293,6 +1299,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str +@@ -1273,7 +1293,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + struct hlsl_ir_node *node; + struct hlsl_block expr; + unsigned int ret = 0; +- bool progress; ++ struct hlsl_src src; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { +@@ -1293,6 +1313,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str case HLSL_IR_RESOURCE_STORE: case HLSL_IR_STORE: case HLSL_IR_SWITCH: @@ -7450,7 +10753,116 @@ index cd05fd008a6..0c196b77595 100644 hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); } -@@ -1933,10 +1940,9 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -1309,13 +1330,12 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + return 0; + } + +- do +- { +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); +- progress |= hlsl_copy_propagation_execute(ctx, &expr); +- } while (progress); ++ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ ++ hlsl_src_from_node(&src, node_from_block(&expr)); ++ hlsl_run_const_passes(ctx, &expr); ++ node = src.node; ++ hlsl_src_remove(&src); + +- node = node_from_block(&expr); + if (node->type == HLSL_IR_CONSTANT) + { + constant = hlsl_ir_constant(node); +@@ -1334,9 +1354,6 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + + static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) + { +- if (t1->base_type > HLSL_TYPE_LAST_SCALAR || t2->base_type > HLSL_TYPE_LAST_SCALAR) +- return false; +- + /* Scalar vars can be converted to pretty much everything */ + if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) + return true; +@@ -1368,10 +1385,6 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t + + static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hlsl_base_type t2) + { +- if (t1 > HLSL_TYPE_LAST_SCALAR || t2 > HLSL_TYPE_LAST_SCALAR) { +- FIXME("Unexpected base type.\n"); +- return HLSL_TYPE_FLOAT; +- } + if (t1 == t2) + return t1 == HLSL_TYPE_BOOL ? HLSL_TYPE_INT : t1; + if (t1 == HLSL_TYPE_DOUBLE || t2 == HLSL_TYPE_DOUBLE) +@@ -1475,7 +1488,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl + struct hlsl_ir_node *load; + struct hlsl_ir_var *var; + +- scalar_type = hlsl_get_scalar_type(ctx, type->base_type); ++ scalar_type = hlsl_get_scalar_type(ctx, type->e.numeric.type); + + if (!(var = hlsl_new_synthetic_var(ctx, "split_op", type, loc))) + return NULL; +@@ -1525,7 +1538,7 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * + const struct hlsl_type *type = instr->data_type; + struct vkd3d_string_buffer *string; + +- switch (type->base_type) ++ switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: +@@ -1575,13 +1588,13 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct + static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, + const struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); + enum hlsl_type_class type; ++ enum hlsl_base_type base; + unsigned int dimx, dimy; + + if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) + return NULL; +- ++ base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); + return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + } + +@@ -1618,14 +1631,15 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str + const struct vkd3d_shader_location *loc) + { + struct hlsl_type *common_type, *return_type; +- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); + enum hlsl_type_class type; ++ enum hlsl_base_type base; + unsigned int dimx, dimy; + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + + if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) + return NULL; + ++ base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); + common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); + +@@ -1665,7 +1679,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type base = arg1->data_type->base_type; ++ enum hlsl_base_type base = arg1->data_type->e.numeric.type; + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *return_type, *integer_type; + enum hlsl_type_class type; +@@ -1695,7 +1709,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h + static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); ++ enum hlsl_base_type base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *common_type, *ret_type; + enum hlsl_ir_expr_op op; +@@ -1933,10 +1947,9 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo return NULL; resource_type = hlsl_deref_get_type(ctx, &resource_deref); @@ -7463,7 +10875,16 @@ index cd05fd008a6..0c196b77595 100644 hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Read-only resources cannot be stored to."); -@@ -2085,24 +2091,23 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i +@@ -1947,7 +1960,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + "Resource store expressions must write to all components."); + + assert(coords->data_type->class == HLSL_CLASS_VECTOR); +- assert(coords->data_type->base_type == HLSL_TYPE_UINT); ++ assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); + assert(coords->data_type->dimx == dim_count); + + if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) +@@ -2085,24 +2098,23 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i } } @@ -7496,7 +10917,7 @@ index cd05fd008a6..0c196b77595 100644 } static bool type_has_numeric_components(struct hlsl_type *type) -@@ -2140,6 +2145,18 @@ static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int mo +@@ -2140,6 +2152,18 @@ static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int mo } } @@ -7515,7 +10936,7 @@ index cd05fd008a6..0c196b77595 100644 static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) { struct hlsl_type *basic_type = v->basic_type; -@@ -2160,7 +2177,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) +@@ -2160,7 +2184,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) type = basic_type; @@ -7524,7 +10945,7 @@ index cd05fd008a6..0c196b77595 100644 { for (i = 0; i < v->arrays.count; ++i) unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -2265,12 +2282,8 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) +@@ -2265,12 +2289,8 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) if (!(modifiers & HLSL_STORAGE_STATIC)) var->storage_modifiers |= HLSL_STORAGE_UNIFORM; @@ -7539,7 +10960,7 @@ index cd05fd008a6..0c196b77595 100644 if ((func = hlsl_get_first_func_decl(ctx, var->name))) { -@@ -2306,7 +2319,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) +@@ -2306,7 +2326,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) } if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) @@ -7548,7 +10969,7 @@ index cd05fd008a6..0c196b77595 100644 { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Static variables cannot have both numeric and resource components."); -@@ -2349,8 +2362,25 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2349,8 +2369,25 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var free_parse_variable_def(v); continue; } @@ -7574,7 +10995,7 @@ index cd05fd008a6..0c196b77595 100644 if (v->initializer.args_count) { if (v->initializer.braces) -@@ -2394,7 +2424,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2394,7 +2431,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var /* Initialize statics to zero by default. */ @@ -7583,7 +11004,34 @@ index cd05fd008a6..0c196b77595 100644 { free_parse_variable_def(v); continue; -@@ -2650,12 +2680,14 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, +@@ -2562,7 +2599,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, + { + struct hlsl_type *type = arg->data_type; + +- if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) ++ if (type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF) + return arg; + + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); +@@ -2589,7 +2626,7 @@ static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *p + static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type base = params->args[0]->data_type->base_type; ++ enum hlsl_base_type base = params->args[0]->data_type->e.numeric.type; + bool vectors = false, matrices = false; + unsigned int dimx = 4, dimy = 4; + struct hlsl_type *common_type; +@@ -2599,7 +2636,7 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * + { + struct hlsl_type *arg_type = params->args[i]->data_type; + +- base = expr_common_base_type(base, arg_type->base_type); ++ base = expr_common_base_type(base, arg_type->e.numeric.type); + + if (arg_type->class == HLSL_CLASS_VECTOR) + { +@@ -2650,12 +2687,14 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -7594,12 +11042,12 @@ index cd05fd008a6..0c196b77595 100644 return false; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ base_type = type->base_type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; ++ base_type = type->e.numeric.type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; + type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); return convert_args(ctx, params, type, loc); } -@@ -2715,81 +2747,62 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, +@@ -2715,81 +2754,62 @@ static bool intrinsic_acos(struct hlsl_ctx *ctx, return write_acos_or_asin(ctx, params, loc, false); } @@ -7629,9 +11077,9 @@ index cd05fd008a6..0c196b77595 100644 + if (!(res = hlsl_add_load_component(ctx, params->instrs, arg, 0, loc))) return false; - hlsl_block_add_instr(params->instrs, zero); -- -- mul = one; +- mul = one; +- - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) + for (i = 1; i < count; ++i) @@ -7672,10 +11120,10 @@ index cd05fd008a6..0c196b77595 100644 - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; - hlsl_block_add_instr(params->instrs, zero); -- + - if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) - return false; - +- - return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); - } - else if (arg->data_type->base_type == HLSL_TYPE_BOOL) @@ -7715,7 +11163,7 @@ index cd05fd008a6..0c196b77595 100644 } static bool intrinsic_asin(struct hlsl_ctx *ctx, -@@ -2857,20 +2870,20 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, +@@ -2857,20 +2877,20 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, type->name, type->name, type->name); if (ret < 0) { @@ -7739,7 +11187,7 @@ index cd05fd008a6..0c196b77595 100644 if (!func) return false; -@@ -2890,15 +2903,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx, +@@ -2890,15 +2910,6 @@ static bool intrinsic_atan2(struct hlsl_ctx *ctx, return write_atan_or_atan2(ctx, params, loc, true); } @@ -7755,7 +11203,25 @@ index cd05fd008a6..0c196b77595 100644 static bool intrinsic_asfloat(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -3022,6 +3026,46 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, +@@ -2906,7 +2917,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, + struct hlsl_type *data_type; + + data_type = params->args[0]->data_type; +- if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) ++ if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) + { + struct vkd3d_string_buffer *string; + +@@ -2942,7 +2953,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, + } + + data_type = params->args[0]->data_type; +- if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) ++ if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) + { + struct vkd3d_string_buffer *string; + +@@ -3022,6 +3033,46 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_COS, arg, loc); } @@ -7802,7 +11268,16 @@ index cd05fd008a6..0c196b77595 100644 static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -3155,6 +3199,94 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, +@@ -3031,7 +3082,7 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, + struct hlsl_type *cast_type; + enum hlsl_base_type base; + +- if (arg1->data_type->base_type == HLSL_TYPE_HALF && arg2->data_type->base_type == HLSL_TYPE_HALF) ++ if (arg1->data_type->e.numeric.type == HLSL_TYPE_HALF && arg2->data_type->e.numeric.type == HLSL_TYPE_HALF) + base = HLSL_TYPE_HALF; + else + base = HLSL_TYPE_FLOAT; +@@ -3155,6 +3206,94 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); } @@ -7863,7 +11338,7 @@ index cd05fd008a6..0c196b77595 100644 + return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); + } + -+ typename = type->base_type == HLSL_TYPE_HALF ? "half" : "float"; ++ typename = type->e.numeric.type == HLSL_TYPE_HALF ? "half" : "float"; + template = templates[dim]; + + switch (dim) @@ -7897,7 +11372,16 @@ index cd05fd008a6..0c196b77595 100644 static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -3646,6 +3778,59 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, +@@ -3478,7 +3617,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1], *cast1, *cast2; +- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); ++ enum hlsl_base_type base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); + struct hlsl_type *cast_type1 = arg1->data_type, *cast_type2 = arg2->data_type, *matrix_type, *ret_type; + unsigned int i, j, k, vect_count = 0; + struct hlsl_deref var_deref; +@@ -3646,6 +3785,59 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, i, neg, loc); } @@ -7936,7 +11420,7 @@ index cd05fd008a6..0c196b77595 100644 + if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) + return false; + -+ base = expr_common_base_type(res_type->base_type, i_type->base_type); ++ base = expr_common_base_type(res_type->e.numeric.type, i_type->e.numeric.type); + base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; + res_type = convert_numeric_type(ctx, res_type, base); + idx_type = convert_numeric_type(ctx, i_type, base); @@ -7957,7 +11441,16 @@ index cd05fd008a6..0c196b77595 100644 static bool intrinsic_round(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -3726,6 +3911,12 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, +@@ -3688,7 +3880,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, + struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, + arg->data_type->dimx, arg->data_type->dimy); + +- if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) ++ if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc))) + return false; + hlsl_block_add_instr(params->instrs, zero); + +@@ -3726,6 +3918,12 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); } @@ -7970,7 +11463,7 @@ index cd05fd008a6..0c196b77595 100644 /* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -@@ -3798,6 +3989,39 @@ static bool intrinsic_tan(struct hlsl_ctx *ctx, +@@ -3798,6 +3996,39 @@ static bool intrinsic_tan(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, sin, cos, loc); } @@ -8010,7 +11503,7 @@ index cd05fd008a6..0c196b77595 100644 static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) { -@@ -3818,7 +4042,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3818,7 +4049,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * } sampler_type = params->args[0]->data_type; @@ -8019,7 +11512,7 @@ index cd05fd008a6..0c196b77595 100644 || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) { struct vkd3d_string_buffer *string; -@@ -3866,7 +4090,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3866,7 +4097,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * return false; } @@ -8028,7 +11521,7 @@ index cd05fd008a6..0c196b77595 100644 { unsigned int count = hlsl_sampler_dim_count(dim); struct hlsl_ir_node *divisor; -@@ -3913,7 +4137,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3913,7 +4144,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * return false; initialize_var_components(ctx, params->instrs, var, &idx, coords); @@ -8037,7 +11530,16 @@ index cd05fd008a6..0c196b77595 100644 { if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) return false; -@@ -4099,7 +4323,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, +@@ -4022,7 +4253,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + return true; + } + +- mat_type = hlsl_get_matrix_type(ctx, arg_type->base_type, arg_type->dimy, arg_type->dimx); ++ mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->dimy, arg_type->dimx); + + if (!(var = hlsl_new_synthetic_var(ctx, "transpose", mat_type, loc))) + return false; +@@ -4099,7 +4330,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) return false; @@ -8046,7 +11548,7 @@ index cd05fd008a6..0c196b77595 100644 return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); return true; -@@ -4130,6 +4354,7 @@ intrinsic_functions[] = +@@ -4130,6 +4361,7 @@ intrinsic_functions[] = {"clamp", 3, true, intrinsic_clamp}, {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, @@ -8054,7 +11556,7 @@ index cd05fd008a6..0c196b77595 100644 {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, -@@ -4138,6 +4363,7 @@ intrinsic_functions[] = +@@ -4138,6 +4370,7 @@ intrinsic_functions[] = {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, {"ddy_fine", 1, true, intrinsic_ddy_fine}, {"degrees", 1, true, intrinsic_degrees}, @@ -8062,7 +11564,7 @@ index cd05fd008a6..0c196b77595 100644 {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, -@@ -4160,15 +4386,18 @@ intrinsic_functions[] = +@@ -4160,15 +4393,18 @@ intrinsic_functions[] = {"pow", 2, true, intrinsic_pow}, {"radians", 1, true, intrinsic_radians}, {"reflect", 2, true, intrinsic_reflect}, @@ -8081,7 +11583,7 @@ index cd05fd008a6..0c196b77595 100644 {"tex1D", -1, false, intrinsic_tex1D}, {"tex2D", -1, false, intrinsic_tex2D}, {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, -@@ -4263,22 +4492,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type +@@ -4263,22 +4499,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type return NULL; for (i = 0; i < params->args_count; ++i) @@ -8105,7 +11607,7 @@ index cd05fd008a6..0c196b77595 100644 if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; -@@ -4318,26 +4532,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -4318,26 +4539,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc))) return false; } @@ -8137,7 +11639,7 @@ index cd05fd008a6..0c196b77595 100644 + if (common_type->dimx == 1 && common_type->dimy == 1) + { + common_type = hlsl_get_numeric_type(ctx, cond_type->class, -+ common_type->base_type, cond_type->dimx, cond_type->dimy); ++ common_type->e.numeric.type, cond_type->dimx, cond_type->dimy); + } + else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) + { @@ -8158,7 +11660,7 @@ index cd05fd008a6..0c196b77595 100644 } if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) -@@ -4362,9 +4584,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -4362,9 +4591,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_release_string_buffer(ctx, second_string); } @@ -8170,12 +11672,12 @@ index cd05fd008a6..0c196b77595 100644 common_type = first->data_type; } -+ assert(cond->data_type->base_type == HLSL_TYPE_BOOL); ++ assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); + args[0] = cond; args[1] = first; args[2] = second; -@@ -4490,8 +4719,7 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc +@@ -4490,8 +4726,7 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc } sampler_type = params->args[0]->data_type; @@ -8185,7 +11687,7 @@ index cd05fd008a6..0c196b77595 100644 { struct vkd3d_string_buffer *string; -@@ -4555,8 +4783,7 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * +@@ -4555,8 +4790,7 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block * } sampler_type = params->args[0]->data_type; @@ -8195,7 +11697,7 @@ index cd05fd008a6..0c196b77595 100644 { struct vkd3d_string_buffer *string; -@@ -4666,8 +4893,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc +@@ -4666,8 +4900,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc } sampler_type = params->args[0]->data_type; @@ -8205,7 +11707,16 @@ index cd05fd008a6..0c196b77595 100644 { struct vkd3d_string_buffer *string; -@@ -4903,8 +5129,7 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * +@@ -4689,7 +4922,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + +- load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->base_type, 4); ++ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); + load_params.resource = object; + load_params.sampler = params->args[0]; + +@@ -4903,8 +5136,7 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block * } sampler_type = params->args[0]->data_type; @@ -8215,7 +11726,7 @@ index cd05fd008a6..0c196b77595 100644 { struct vkd3d_string_buffer *string; -@@ -4966,8 +5191,7 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block +@@ -4966,8 +5198,7 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block } sampler_type = params->args[0]->data_type; @@ -8225,7 +11736,7 @@ index cd05fd008a6..0c196b77595 100644 { struct vkd3d_string_buffer *string; -@@ -5051,8 +5275,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru +@@ -5051,8 +5282,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru const struct hlsl_type *object_type = object->data_type; const struct method_function *method; @@ -8235,7 +11746,7 @@ index cd05fd008a6..0c196b77595 100644 { struct vkd3d_string_buffer *string; -@@ -5193,6 +5416,16 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5193,6 +5423,16 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, hlsl_release_string_buffer(ctx, string); } @@ -8252,7 +11763,7 @@ index cd05fd008a6..0c196b77595 100644 } %locations -@@ -5233,6 +5466,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5233,6 +5473,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct parse_attribute_list attr_list; struct hlsl_ir_switch_case *switch_case; struct hlsl_scope *scope; @@ -8261,7 +11772,7 @@ index cd05fd008a6..0c196b77595 100644 } %token KW_BLENDSTATE -@@ -5243,6 +5478,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5243,6 +5485,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_CENTROID %token KW_COLUMN_MAJOR %token KW_COMPILE @@ -8269,7 +11780,7 @@ index cd05fd008a6..0c196b77595 100644 %token KW_CONST %token KW_CONTINUE %token KW_DEFAULT -@@ -5250,14 +5486,17 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5250,14 +5493,17 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_DEPTHSTENCILVIEW %token KW_DISCARD %token KW_DO @@ -8287,7 +11798,7 @@ index cd05fd008a6..0c196b77595 100644 %token KW_IF %token KW_IN %token KW_INLINE -@@ -5271,7 +5510,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5271,7 +5517,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER @@ -8295,7 +11806,7 @@ index cd05fd008a6..0c196b77595 100644 %token KW_RASTERIZERORDEREDBUFFER %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER %token KW_RASTERIZERORDEREDTEXTURE1D -@@ -5429,6 +5667,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5429,6 +5674,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type any_identifier %type var_identifier @@ -8303,7 +11814,16 @@ index cd05fd008a6..0c196b77595 100644 %type name_opt %type parameter -@@ -5443,6 +5682,10 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5436,13 +5682,17 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %type param_list + %type parameters + +-%type register_opt +-%type packoffset_opt ++%type register_reservation ++%type packoffset_reservation + + %type texture_type texture_ms_type uav_type rov_type %type semantic @@ -8314,7 +11834,7 @@ index cd05fd008a6..0c196b77595 100644 %type switch_case %type field_type -@@ -5453,6 +5696,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5453,6 +5703,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type type_no_void %type typedef_type @@ -8322,7 +11842,7 @@ index cd05fd008a6..0c196b77595 100644 %type type_spec %type variable_decl %type variable_def -@@ -5483,9 +5727,9 @@ name_opt: +@@ -5483,9 +5734,9 @@ name_opt: | any_identifier pass: @@ -8334,7 +11854,7 @@ index cd05fd008a6..0c196b77595 100644 YYABORT; } -@@ -5535,10 +5779,6 @@ technique10: +@@ -5535,10 +5786,6 @@ technique10: struct hlsl_scope *scope = ctx->cur_scope; hlsl_pop_scope(ctx); @@ -8345,7 +11865,7 @@ index cd05fd008a6..0c196b77595 100644 if (!add_technique(ctx, $2, scope, $3, "technique10", &@1)) YYABORT; } -@@ -5580,12 +5820,12 @@ effect_group: +@@ -5580,12 +5827,12 @@ effect_group: } buffer_declaration: @@ -8362,7 +11882,20 @@ index cd05fd008a6..0c196b77595 100644 YYABORT; } -@@ -5884,9 +6124,9 @@ func_prototype_no_attrs: +@@ -5792,11 +6039,7 @@ attribute_list: + $$ = $1; + if (!(new_array = vkd3d_realloc($$.attrs, ($$.count + 1) * sizeof(*$$.attrs)))) + { +- unsigned int i; +- +- for (i = 0; i < $$.count; ++i) +- hlsl_free_attribute((void *)$$.attrs[i]); +- vkd3d_free($$.attrs); ++ cleanup_parse_attribute_list(&$$); + YYABORT; + } + $$.attrs = new_array; +@@ -5884,9 +6127,9 @@ func_prototype_no_attrs: /* Functions are unconditionally inlined. */ modifiers &= ~HLSL_MODIFIER_INLINE; @@ -8374,7 +11907,116 @@ index cd05fd008a6..0c196b77595 100644 if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) YYABORT; if ((var = hlsl_get_var(ctx->globals, $3))) -@@ -6388,7 +6628,7 @@ type_no_void: +@@ -6002,11 +6245,7 @@ func_prototype: + } + else + { +- unsigned int i; +- +- for (i = 0; i < $1.count; ++i) +- hlsl_free_attribute((void *)$1.attrs[i]); +- vkd3d_free($1.attrs); ++ cleanup_parse_attribute_list(&$1); + } + $$ = $2; + } +@@ -6060,12 +6299,12 @@ colon_attribute: + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; + } +- | register_opt ++ | register_reservation + { + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation = $1; + } +- | packoffset_opt ++ | packoffset_reservation + { + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation = $1; +@@ -6087,22 +6326,57 @@ semantic: + } + + /* FIXME: Writemasks */ +-register_opt: ++register_reservation: + ':' KW_REGISTER '(' any_identifier ')' + { +- $$ = parse_reg_reservation($4); ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) ++ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $4); ++ + vkd3d_free($4); + } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' + { +- FIXME("Ignoring shader target %s in a register reservation.\n", debugstr_a($4)); ++ memset(&$$, 0, sizeof($$)); ++ if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) ++ { ++ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); ++ } ++ else if (parse_reservation_space($6, &$$.reg_space)) ++ { ++ if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) ++ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $4); ++ } ++ else ++ { ++ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register or space reservation '%s'.", $6); ++ } ++ + vkd3d_free($4); ++ vkd3d_free($6); ++ } ++ | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' ++ { ++ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); ++ ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) ++ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $6); ++ ++ if (!parse_reservation_space($8, &$$.reg_space)) ++ hlsl_error(ctx, &@8, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register space reservation '%s'.", $8); + +- $$ = parse_reg_reservation($6); ++ vkd3d_free($4); + vkd3d_free($6); ++ vkd3d_free($8); + } + +-packoffset_opt: ++packoffset_reservation: + ':' KW_PACKOFFSET '(' any_identifier ')' + { + $$ = parse_packoffset(ctx, $4, NULL, &@$); +@@ -6307,7 +6581,7 @@ type_no_void: + YYABORT; + } + +- $$ = hlsl_type_clone(ctx, hlsl_get_vector_type(ctx, $3->base_type, $5), 0, 0); ++ $$ = hlsl_type_clone(ctx, hlsl_get_vector_type(ctx, $3->e.numeric.type, $5), 0, 0); + $$->is_minimum_precision = $3->is_minimum_precision; + } + | KW_VECTOR +@@ -6340,7 +6614,7 @@ type_no_void: + YYABORT; + } + +- $$ = hlsl_type_clone(ctx, hlsl_get_matrix_type(ctx, $3->base_type, $7, $5), 0, 0); ++ $$ = hlsl_type_clone(ctx, hlsl_get_matrix_type(ctx, $3->e.numeric.type, $7, $5), 0, 0); + $$->is_minimum_precision = $3->is_minimum_precision; + } + | KW_MATRIX +@@ -6388,7 +6662,7 @@ type_no_void: { validate_texture_format_type(ctx, $3, &@3); @@ -8383,7 +12025,7 @@ index cd05fd008a6..0c196b77595 100644 { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); -@@ -6427,7 +6667,7 @@ type_no_void: +@@ -6427,7 +6701,7 @@ type_no_void: $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); if ($$->is_minimum_precision) { @@ -8392,7 +12034,7 @@ index cd05fd008a6..0c196b77595 100644 { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Target profile doesn't support minimum-precision types."); -@@ -6454,6 +6694,14 @@ type_no_void: +@@ -6454,6 +6728,14 @@ type_no_void: { $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); } @@ -8407,7 +12049,7 @@ index cd05fd008a6..0c196b77595 100644 type: type_no_void -@@ -6583,22 +6831,91 @@ variable_decl: +@@ -6583,22 +6865,97 @@ variable_decl: $$->reg_reservation = $3.reg_reservation; } @@ -8467,6 +12109,7 @@ index cd05fd008a6..0c196b77595 100644 + | state_block stateblock_lhs_identifier state_block_index_opt '=' complex_initializer ';' + { + struct hlsl_state_block_entry *entry; ++ unsigned int i; + + if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) + YYABORT; @@ -8476,8 +12119,13 @@ index cd05fd008a6..0c196b77595 100644 + entry->lhs_index = $3.index; + + entry->instrs = $5.instrs; -+ entry->args = $5.args; ++ + entry->args_count = $5.args_count; ++ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) ++ YYABORT; ++ for (i = 0; i < entry->args_count; ++i) ++ hlsl_src_from_node(&entry->args[i], $5.args[i]); ++ vkd3d_free($5.args); + + $$ = $1; + state_block_add_entry($$, entry); @@ -8506,7 +12154,7 @@ index cd05fd008a6..0c196b77595 100644 variable_def: variable_decl -@@ -6611,6 +6928,24 @@ variable_def: +@@ -6611,6 +6968,24 @@ variable_def: { $$ = $1; ctx->in_state_block = 0; @@ -8531,7 +12179,7 @@ index cd05fd008a6..0c196b77595 100644 } variable_def_typed: -@@ -6727,10 +7062,6 @@ var_modifiers: +@@ -6727,10 +7102,6 @@ var_modifiers: { $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOPERSPECTIVE, &@1); } @@ -8542,7 +12190,7 @@ index cd05fd008a6..0c196b77595 100644 | KW_SHARED var_modifiers { $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); -@@ -6779,7 +7110,20 @@ var_modifiers: +@@ -6779,7 +7150,20 @@ var_modifiers: { $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); } @@ -8564,7 +12212,78 @@ index cd05fd008a6..0c196b77595 100644 complex_initializer: initializer_expr -@@ -7227,15 +7571,13 @@ primary_expr: +@@ -6978,6 +7362,7 @@ selection_statement: + { + destroy_block($6.then_block); + destroy_block($6.else_block); ++ cleanup_parse_attribute_list(&$1); + YYABORT; + } + +@@ -6985,10 +7370,12 @@ selection_statement: + { + destroy_block($6.then_block); + destroy_block($6.else_block); ++ cleanup_parse_attribute_list(&$1); + YYABORT; + } + destroy_block($6.then_block); + destroy_block($6.else_block); ++ cleanup_parse_attribute_list(&$1); + + $$ = $4; + hlsl_block_add_instr($$, instr); +@@ -7011,21 +7398,25 @@ loop_statement: + { + $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); + hlsl_pop_scope(ctx); ++ cleanup_parse_attribute_list(&$1); + } + | attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';' + { + $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); + hlsl_pop_scope(ctx); ++ cleanup_parse_attribute_list(&$1); + } + | attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement + { + $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + hlsl_pop_scope(ctx); ++ cleanup_parse_attribute_list(&$1); + } + | attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement + { + $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + hlsl_pop_scope(ctx); ++ cleanup_parse_attribute_list(&$1); + } + + switch_statement: +@@ -7038,6 +7429,7 @@ switch_statement: + { + destroy_switch_cases($8); + destroy_block($5); ++ cleanup_parse_attribute_list(&$1); + YYABORT; + } + +@@ -7048,6 +7440,7 @@ switch_statement: + if (!s) + { + destroy_block($5); ++ cleanup_parse_attribute_list(&$1); + YYABORT; + } + +@@ -7055,6 +7448,7 @@ switch_statement: + hlsl_block_add_instr($$, s); + + hlsl_pop_scope(ctx); ++ cleanup_parse_attribute_list(&$1); + } + + switch_case: +@@ -7227,15 +7621,13 @@ primary_expr: { if (ctx->in_state_block) { @@ -8586,9 +12305,33 @@ index cd05fd008a6..0c196b77595 100644 } else diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 307f86f55b7..f6cccfe8bea 100644 +index 307f86f55b7..27f16af51c5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -263,8 +263,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls + if (type1->dimx != type2->dimx) + return false; + +- return base_type_get_semantic_equivalent(type1->base_type) +- == base_type_get_semantic_equivalent(type2->base_type); ++ return base_type_get_semantic_equivalent(type1->e.numeric.type) ++ == base_type_get_semantic_equivalent(type2->e.numeric.type); + } + + static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, +@@ -355,10 +355,10 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + if (!semantic->name) + return; + +- vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); ++ vector_type_dst = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); + vector_type_src = vector_type_dst; + if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) +- vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4); ++ vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); + + for (i = 0; i < hlsl_type_major_size(type); ++i) + { @@ -427,7 +427,10 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * { field = &type->e.record.fields[i]; @@ -8600,6 +12343,33 @@ index 307f86f55b7..f6cccfe8bea 100644 validate_field_semantic(ctx, field); semantic = &field->semantic; elem_semantic_index = semantic->index; +@@ -497,7 +500,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + if (!semantic->name) + return; + +- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); ++ vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); + + for (i = 0; i < hlsl_type_major_size(type); ++i) + { +@@ -1098,7 +1101,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_ir_node *resource_load; + + assert(coords->data_type->class == HLSL_CLASS_VECTOR); +- assert(coords->data_type->base_type == HLSL_TYPE_UINT); ++ assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); + assert(coords->data_type->dimx == dim_count); + + if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) +@@ -1188,7 +1191,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s + { + struct hlsl_ir_node *new_cast, *swizzle; + +- dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); ++ dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->e.numeric.type); + /* We need to preserve the cast since it might be doing more than just + * turning the scalar into a vector. */ + if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) @@ -1562,7 +1565,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); @@ -8609,28 +12379,36 @@ index 307f86f55b7..f6cccfe8bea 100644 { struct hlsl_ir_node *swizzle_node; -@@ -1622,6 +1625,9 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, +@@ -1622,7 +1625,11 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: +- case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: - case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_VERTEX_SHADER: break; -@@ -1631,6 +1637,10 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + case HLSL_CLASS_MATRIX: +@@ -1631,6 +1638,15 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, /* FIXME: Actually we shouldn't even get here, but we don't split * matrices yet. */ return false; + ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: + vkd3d_unreachable(); } if (copy_propagation_replace_with_constant_vector(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node)) -@@ -1739,7 +1749,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s +@@ -1739,7 +1755,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s { unsigned int writemask = store->writemask; @@ -8639,7 +12417,43 @@ index 307f86f55b7..f6cccfe8bea 100644 writemask = VKD3DSP_WRITEMASK_0; copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index); } -@@ -2603,8 +2613,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in +@@ -2049,7 +2065,7 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + src_type = expr->operands[0].node->data_type; + + if (hlsl_types_are_equal(src_type, dst_type) +- || (src_type->base_type == dst_type->base_type && is_vec1(src_type) && is_vec1(dst_type))) ++ || (src_type->e.numeric.type == dst_type->e.numeric.type && is_vec1(src_type) && is_vec1(dst_type))) + { + hlsl_replace_node(&expr->node, expr->operands[0].node); + return true; +@@ -2176,7 +2192,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + type = rhs->data_type; + if (type->class != HLSL_CLASS_MATRIX) + return false; +- element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); ++ element_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); + + if (rhs->type != HLSL_IR_LOAD) + { +@@ -2213,7 +2229,7 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + { + struct hlsl_ir_node *new_cast, *swizzle; + +- dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); ++ dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->dimx); + /* We need to preserve the cast since it might be doing more than just + * narrowing the vector. */ + if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) +@@ -2467,7 +2483,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir + + op = HLSL_OP2_DOT; + if (type->dimx == 1) +- op = type->base_type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; ++ op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; + + /* Note: We may be creating a DOT for bool vectors here, which we need to lower to + * LOGIC_OR + LOGIC_AND. */ +@@ -2603,8 +2619,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in hlsl_copy_deref(ctx, &load->sampler, &load->resource); load->resource.var = var; @@ -8650,7 +12464,7 @@ index 307f86f55b7..f6cccfe8bea 100644 return true; } -@@ -2647,10 +2657,11 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) +@@ -2647,10 +2663,11 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) return false; } @@ -8664,14 +12478,21 @@ index 307f86f55b7..f6cccfe8bea 100644 struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) -@@ -2665,17 +2676,15 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - if (arg->data_type->base_type != HLSL_TYPE_FLOAT && arg->data_type->base_type != HLSL_TYPE_HALF) +@@ -2660,22 +2677,20 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, return false; + arg = expr->operands[0].node; +- if (instr->data_type->base_type != HLSL_TYPE_INT && instr->data_type->base_type != HLSL_TYPE_UINT) +- return false; +- if (arg->data_type->base_type != HLSL_TYPE_FLOAT && arg->data_type->base_type != HLSL_TYPE_HALF) ++ if (instr->data_type->e.numeric.type != HLSL_TYPE_INT && instr->data_type->e.numeric.type != HLSL_TYPE_UINT) + return false; +- - /* Check that the argument is not already a FLOOR */ - if (arg->type == HLSL_IR_EXPR && hlsl_ir_expr(arg)->op == HLSL_OP1_FLOOR) -- return false; -- ++ if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) + return false; + if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) return false; hlsl_block_add_instr(block, floor); @@ -8686,7 +12507,7 @@ index 307f86f55b7..f6cccfe8bea 100644 return true; } -@@ -2903,12 +2912,60 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct +@@ -2903,12 +2918,60 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return true; } @@ -8709,7 +12530,7 @@ index 307f86f55b7..f6cccfe8bea 100644 + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); + + /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ -+ assert(arg->data_type->base_type == HLSL_TYPE_BOOL); ++ assert(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); + + if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) + return false; @@ -8750,12 +12571,12 @@ index 307f86f55b7..f6cccfe8bea 100644 struct hlsl_ir_expr *expr; struct hlsl_type *type; -@@ -2929,55 +2986,282 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru +@@ -2929,55 +2992,282 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return false; } - if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ assert(cond->data_type->base_type == HLSL_TYPE_BOOL); ++ assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); + + type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, + instr->data_type->dimx, instr->data_type->dimy); @@ -8955,12 +12776,7 @@ index 307f86f55b7..f6cccfe8bea 100644 + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); - -- memset(operands, 0, sizeof(operands)); -- operands[0] = cond; -- operands[1] = first; -- operands[2] = second; -- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) ++ + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); @@ -8972,7 +12788,12 @@ index 307f86f55b7..f6cccfe8bea 100644 + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) + return false; + hlsl_block_add_instr(block, sub); -+ + +- memset(operands, 0, sizeof(operands)); +- operands[0] = cond; +- operands[1] = first; +- operands[2] = second; +- if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; @@ -9067,7 +12888,16 @@ index 307f86f55b7..f6cccfe8bea 100644 return true; } -@@ -3018,11 +3302,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -2996,7 +3286,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + arg_type = expr->operands[0].node->data_type; + if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) + return false; +- if (type->base_type != HLSL_TYPE_BOOL) ++ if (type->e.numeric.type != HLSL_TYPE_BOOL) + return false; + + /* Narrowing casts should have already been lowered. */ +@@ -3018,11 +3308,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { @@ -9077,7 +12907,7 @@ index 307f86f55b7..f6cccfe8bea 100644 assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); -+ if (cond_type->base_type != HLSL_TYPE_BOOL) ++ if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) + { + cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); + @@ -9089,7 +12919,61 @@ index 307f86f55b7..f6cccfe8bea 100644 operands[0] = condition; operands[1] = if_true; operands[2] = if_false; -@@ -3308,6 +3602,63 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -3050,7 +3350,7 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + return false; + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) + return false; +- if (type->base_type != HLSL_TYPE_INT) ++ if (type->e.numeric.type != HLSL_TYPE_INT) + return false; + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); + +@@ -3116,7 +3416,7 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + return false; + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) + return false; +- if (type->base_type != HLSL_TYPE_INT) ++ if (type->e.numeric.type != HLSL_TYPE_INT) + return false; + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); + +@@ -3175,7 +3475,7 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + return false; + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) + return false; +- if (type->base_type != HLSL_TYPE_INT) ++ if (type->e.numeric.type != HLSL_TYPE_INT) + return false; + + arg = expr->operands[0].node; +@@ -3206,14 +3506,14 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + if (expr->op != HLSL_OP2_DOT) + return false; + +- if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT +- || type->base_type == HLSL_TYPE_BOOL) ++ if (type->e.numeric.type == HLSL_TYPE_INT || type->e.numeric.type == HLSL_TYPE_UINT ++ || type->e.numeric.type == HLSL_TYPE_BOOL) + { + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + assert(arg1->data_type->dimx == arg2->data_type->dimx); + dimx = arg1->data_type->dimx; +- is_bool = type->base_type == HLSL_TYPE_BOOL; ++ is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; + + if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) + return false; +@@ -3259,7 +3559,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + return false; + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) + return false; +- if (type->base_type != HLSL_TYPE_FLOAT) ++ if (type->e.numeric.type != HLSL_TYPE_FLOAT) + return false; + btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); + +@@ -3308,6 +3608,63 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; } @@ -9100,7 +12984,7 @@ index 307f86f55b7..f6cccfe8bea 100644 + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); -+ if (expr->op == HLSL_OP1_CAST || instr->data_type->base_type == HLSL_TYPE_FLOAT) ++ if (expr->op == HLSL_OP1_CAST || instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT) + return false; + + switch (expr->op) @@ -9153,7 +13037,7 @@ index 307f86f55b7..f6cccfe8bea 100644 static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; -@@ -3402,6 +3753,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -3402,6 +3759,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: break; @@ -9163,7 +13047,7 @@ index 307f86f55b7..f6cccfe8bea 100644 } return false; -@@ -3457,9 +3811,6 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) +@@ -3457,9 +3817,6 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) { unsigned int r; @@ -9173,7 +13057,7 @@ index 307f86f55b7..f6cccfe8bea 100644 if (var->reg_reservation.reg_type) { for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) -@@ -3493,6 +3844,22 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) +@@ -3493,6 +3850,22 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) } } @@ -9196,7 +13080,7 @@ index 307f86f55b7..f6cccfe8bea 100644 /* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend * to at least the range of the entire loop. We also do this for nodes, so that -@@ -3512,6 +3879,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -3512,6 +3885,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_CALL: /* We should have inlined all calls before computing liveness. */ vkd3d_unreachable(); @@ -9206,7 +13090,7 @@ index 307f86f55b7..f6cccfe8bea 100644 case HLSL_IR_STORE: { -@@ -3521,8 +3891,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -3521,8 +3897,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; store->rhs.node->last_read = last_read; @@ -9216,7 +13100,7 @@ index 307f86f55b7..f6cccfe8bea 100644 break; } case HLSL_IR_EXPR: -@@ -3549,8 +3918,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -3549,8 +3924,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = load->src.var; var->last_read = max(var->last_read, last_read); @@ -9226,7 +13110,7 @@ index 307f86f55b7..f6cccfe8bea 100644 break; } case HLSL_IR_LOOP: -@@ -3567,14 +3935,12 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -3567,14 +3941,12 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = load->resource.var; var->last_read = max(var->last_read, last_read); @@ -9243,7 +13127,7 @@ index 307f86f55b7..f6cccfe8bea 100644 } if (load->coords.node) -@@ -3599,8 +3965,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -3599,8 +3971,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->resource.var; var->last_read = max(var->last_read, last_read); @@ -9253,7 +13137,177 @@ index 307f86f55b7..f6cccfe8bea 100644 store->coords.node->last_read = last_read; store->value.node->last_read = last_read; break; -@@ -4435,7 +4800,9 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) +@@ -3877,34 +4248,67 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls + return false; + } + +-static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) + { +- struct hlsl_ir_resource_load *load; +- struct hlsl_ir_var *var; +- enum hlsl_regset regset; ++ struct hlsl_ir_var *var = deref->var; ++ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); ++ uint32_t required_bind_count; ++ struct hlsl_type *type; + unsigned int index; + +- if (instr->type != HLSL_IR_RESOURCE_LOAD) +- return false; +- +- load = hlsl_ir_resource_load(instr); +- var = load->resource.var; ++ if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) ++ return; + +- regset = hlsl_deref_get_regset(ctx, &load->resource); ++ if (regset <= HLSL_REGSET_LAST_OBJECT) ++ { ++ var->objects_usage[regset][index].used = true; ++ var->bind_count[regset] = max(var->bind_count[regset], index + 1); ++ } ++ else if (regset == HLSL_REGSET_NUMERIC) ++ { ++ type = hlsl_deref_get_type(ctx, deref); + +- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) +- return false; ++ hlsl_regset_index_from_deref(ctx, deref, regset, &index); ++ required_bind_count = align(index + type->reg_size[regset], 4) / 4; ++ var->bind_count[regset] = max(var->bind_count[regset], required_bind_count); ++ } ++ else ++ { ++ vkd3d_unreachable(); ++ } ++} + +- var->objects_usage[regset][index].used = true; +- var->bind_count[regset] = max(var->bind_count[regset], index + 1); +- if (load->sampler.var) ++static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ switch (instr->type) + { +- var = load->sampler.var; +- if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) +- return false; ++ case HLSL_IR_LOAD: ++ { ++ struct hlsl_ir_load *load = hlsl_ir_load(instr); ++ ++ if (!load->src.var->is_uniform) ++ return false; + +- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; +- var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); ++ /* These will are handled by validate_static_object_references(). */ ++ if (hlsl_deref_get_regset(ctx, &load->src) != HLSL_REGSET_NUMERIC) ++ return false; ++ ++ register_deref_usage(ctx, &load->src); ++ break; ++ } ++ ++ case HLSL_IR_RESOURCE_LOAD: ++ register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->resource); ++ if (hlsl_ir_resource_load(instr)->sampler.var) ++ register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->sampler); ++ break; ++ ++ case HLSL_IR_RESOURCE_STORE: ++ register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); ++ break; ++ ++ default: ++ break; + } + + return false; +@@ -4083,7 +4487,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + continue; + value = &constant->value.u[i++]; + +- switch (type->base_type) ++ switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + f = !!value->u; +@@ -4149,16 +4553,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + } + } + ++static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort) ++{ ++ struct hlsl_ir_var *var; ++ ++ list_remove(&to_sort->extern_entry); ++ ++ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) ++ { ++ uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC]; ++ uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC]; ++ ++ if (to_sort_size > var_size) ++ { ++ list_add_before(&var->extern_entry, &to_sort->extern_entry); ++ return; ++ } ++ } ++ ++ list_add_tail(sorted, &to_sort->extern_entry); ++} ++ ++static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) ++{ ++ struct list sorted = LIST_INIT(sorted); ++ struct hlsl_ir_var *var, *next; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_uniform) ++ sort_uniform_by_numeric_bind_count(&sorted, var); ++ } ++ list_move_tail(&ctx->extern_vars, &sorted); ++} ++ + static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { + struct register_allocator allocator = {0}; + struct hlsl_ir_var *var; + ++ sort_uniforms_by_numeric_bind_count(ctx); ++ + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + +- if (!var->is_uniform || !var->last_read || reg_size == 0) ++ if (!var->is_uniform || reg_size == 0) + continue; + + if (var->reg_reservation.reg_type == 'c') +@@ -4189,15 +4629,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; + +- if (!var->is_uniform || !var->last_read || reg_size == 0) ++ if (!var->is_uniform || alloc_size == 0) + continue; + + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + { +- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, +- 1, UINT_MAX, var->data_type); ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size); + TRACE("Allocated %s to %s.\n", var->name, + debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } +@@ -4435,7 +4874,9 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) continue; if (var1->reg_reservation.offset_type @@ -9264,7 +13318,145 @@ index 307f86f55b7..f6cccfe8bea 100644 buffer->manually_packed_elements = true; else buffer->automatically_packed_elements = true; -@@ -4885,25 +5252,6 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a +@@ -4674,7 +5115,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR +- && path_node->data_type->base_type == HLSL_TYPE_UINT); ++ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; + +@@ -4729,14 +5170,15 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + return true; + } + ++/* Retrieves true if the index is constant, and false otherwise. In the latter case, the maximum ++ * possible index is retrieved, assuming there is not out-of-bounds access. */ + bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index) + { + struct hlsl_type *type = deref->var->data_type; ++ bool index_is_constant = true; + unsigned int i; + +- assert(regset <= HLSL_REGSET_LAST_OBJECT); +- + *index = 0; + + for (i = 0; i < deref->path_len; ++i) +@@ -4745,37 +5187,62 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref + unsigned int idx = 0; + + assert(path_node); +- if (path_node->type != HLSL_IR_CONSTANT) +- return false; ++ if (path_node->type == HLSL_IR_CONSTANT) ++ { ++ /* We should always have generated a cast to UINT. */ ++ assert(path_node->data_type->class == HLSL_CLASS_SCALAR ++ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + +- /* We should always have generated a cast to UINT. */ +- assert(path_node->data_type->class == HLSL_CLASS_SCALAR +- && path_node->data_type->base_type == HLSL_TYPE_UINT); ++ idx = hlsl_ir_constant(path_node)->value.u[0].u; + +- idx = hlsl_ir_constant(path_node)->value.u[0].u; ++ switch (type->class) ++ { ++ case HLSL_CLASS_ARRAY: ++ if (idx >= type->e.array.elements_count) ++ return false; + +- switch (type->class) ++ *index += idx * type->e.array.type->reg_size[regset]; ++ break; ++ ++ case HLSL_CLASS_STRUCT: ++ *index += type->e.record.fields[idx].reg_offset[regset]; ++ break; ++ ++ case HLSL_CLASS_MATRIX: ++ *index += 4 * idx; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ } ++ else + { +- case HLSL_CLASS_ARRAY: +- if (idx >= type->e.array.elements_count) +- return false; ++ index_is_constant = false; + +- *index += idx * type->e.array.type->reg_size[regset]; +- break; ++ switch (type->class) ++ { ++ case HLSL_CLASS_ARRAY: ++ idx = type->e.array.elements_count - 1; ++ *index += idx * type->e.array.type->reg_size[regset]; ++ break; + +- case HLSL_CLASS_STRUCT: +- *index += type->e.record.fields[idx].reg_offset[regset]; +- break; ++ case HLSL_CLASS_MATRIX: ++ idx = hlsl_type_major_size(type) - 1; ++ *index += idx * 4; ++ break; + +- default: +- vkd3d_unreachable(); ++ default: ++ vkd3d_unreachable(); ++ } + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + +- assert(type->reg_size[regset] == 1); +- return true; ++ assert(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); ++ assert(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); ++ return index_is_constant; + } + + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) +@@ -4790,7 +5257,7 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref + { + /* We should always have generated a cast to UINT. */ + assert(offset_node->data_type->class == HLSL_CLASS_SCALAR +- && offset_node->data_type->base_type == HLSL_TYPE_UINT); ++ && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + assert(offset_node->type != HLSL_IR_CONSTANT); + return false; + } +@@ -4857,7 +5324,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a + const struct hlsl_ir_constant *constant; + + if (type->class != HLSL_CLASS_SCALAR +- || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) ++ || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) + { + struct vkd3d_string_buffer *string; + +@@ -4876,8 +5343,8 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a + } + constant = hlsl_ir_constant(instr); + +- if ((type->base_type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) +- || (type->base_type == HLSL_TYPE_UINT && !constant->value.u[0].u)) ++ if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) ++ || (type->e.numeric.type == HLSL_TYPE_UINT && !constant->value.u[0].u)) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, + "Thread count must be a positive integer."); + +@@ -4885,25 +5352,6 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } } @@ -9290,7 +13482,76 @@ index 307f86f55b7..f6cccfe8bea 100644 static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) { struct hlsl_ir_node *instr, *next; -@@ -5011,9 +5359,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -4960,15 +5408,42 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod + } + } + +-void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *body) ++void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + { +- struct hlsl_ir_var *var; ++ bool progress; + +- LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) ++ lower_ir(ctx, lower_matrix_swizzles, body); ++ lower_ir(ctx, lower_index_loads, body); ++ ++ lower_ir(ctx, lower_broadcasts, body); ++ while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); ++ do + { +- if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) +- prepend_uniform_copy(ctx, body, var); ++ progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); ++ progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); + } ++ while (progress); ++ hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); ++ ++ lower_ir(ctx, lower_narrowing_casts, body); ++ lower_ir(ctx, lower_int_dot, body); ++ lower_ir(ctx, lower_int_division, body); ++ lower_ir(ctx, lower_int_modulus, body); ++ lower_ir(ctx, lower_int_abs, body); ++ lower_ir(ctx, lower_casts_to_bool, body); ++ lower_ir(ctx, lower_float_modulus, body); ++ hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); ++ ++ do ++ { ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, body); ++ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); ++ } while (progress); + } + + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +@@ -4979,7 +5454,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + struct recursive_call_ctx recursive_call_ctx; + struct hlsl_ir_var *var; + unsigned int i; +- bool progress; + + list_move_head(&body->instrs, &ctx->static_initializers.instrs); + +@@ -4999,7 +5473,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + lower_ir(ctx, lower_matrix_swizzles, body); + lower_ir(ctx, lower_index_loads, body); + +- hlsl_prepend_global_uniform_copy(ctx, body); ++ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) ++ prepend_uniform_copy(ctx, body, var); ++ } + + for (i = 0; i < entry_func->parameters.count; ++i) + { +@@ -5011,9 +5489,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } else { @@ -9300,21 +13561,54 @@ index 307f86f55b7..f6cccfe8bea 100644 if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT && !var->semantic.name) { -@@ -5067,11 +5412,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); +@@ -5056,34 +5531,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } +- lower_ir(ctx, lower_broadcasts, body); +- while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); +- do +- { +- progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); +- progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); +- } +- while (progress); +- hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); - lower_ir(ctx, lower_narrowing_casts, body); +- lower_ir(ctx, lower_narrowing_casts, body); - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); - lower_ir(ctx, lower_int_division, body); - lower_ir(ctx, lower_int_modulus, body); - lower_ir(ctx, lower_int_abs, body); -+ lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_float_modulus, body); - hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do -@@ -5098,9 +5443,15 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); +- lower_ir(ctx, lower_int_dot, body); +- lower_ir(ctx, lower_int_division, body); +- lower_ir(ctx, lower_int_modulus, body); +- lower_ir(ctx, lower_int_abs, body); +- lower_ir(ctx, lower_float_modulus, body); +- hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); +- do +- { +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); +- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); +- progress |= hlsl_copy_propagation_execute(ctx, body); +- progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); +- progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); +- progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); +- } +- while (progress); ++ hlsl_run_const_passes(ctx, body); ++ + remove_unreachable_code(ctx, body); + hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); + +@@ -5095,12 +5545,23 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); +- hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); ++ ++ do ++ compute_liveness(ctx, entry_func); ++ while (hlsl_transform_ir(ctx, dce, body, NULL)); ++ ++ hlsl_transform_ir(ctx, track_components_usage, body, NULL); sort_synthetic_separated_samplers_first(ctx); - lower_ir(ctx, lower_ternary, body); @@ -9330,7 +13624,7 @@ index 307f86f55b7..f6cccfe8bea 100644 lower_ir(ctx, lower_casts_to_int, body); lower_ir(ctx, lower_division, body); lower_ir(ctx, lower_sqrt, body); -@@ -5108,6 +5459,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -5108,6 +5569,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_round, body); lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); @@ -9343,7 +13637,7 @@ index 307f86f55b7..f6cccfe8bea 100644 } if (profile->major_version < 2) -@@ -5117,6 +5474,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -5117,6 +5584,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); @@ -9355,14 +13649,455 @@ index 307f86f55b7..f6cccfe8bea 100644 transform_derefs(ctx, replace_deref_path_with_offset, body); while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index b76b1fce507..4cea98e9286 100644 +index b76b1fce507..16015fa8a81 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -1177,30 +1177,11 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, +@@ -25,10 +25,10 @@ + static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; - assert(dst_type->base_type == src2->node.data_type->base_type); - assert(dst_type->base_type == src3->node.data_type->base_type); -+ assert(src1->node.data_type->base_type == HLSL_TYPE_BOOL); +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -116,10 +116,10 @@ static int32_t double_to_int(double x) + static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -158,7 +158,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + for (k = 0; k < dst_type->dimx; ++k) + { +- switch (src->node.data_type->base_type) ++ switch (src->node.data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +@@ -200,7 +200,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + vkd3d_unreachable(); + } + +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +@@ -231,10 +231,10 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -257,10 +257,10 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -283,10 +283,10 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -309,11 +309,11 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + float i; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -336,10 +336,10 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -381,10 +381,10 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -415,10 +415,10 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -439,10 +439,10 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -484,10 +484,10 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -524,10 +524,10 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -550,10 +550,10 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->base_type); ++ assert(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -595,11 +595,11 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -632,11 +632,11 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -659,11 +659,11 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -686,11 +686,11 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const + static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -712,11 +712,11 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + + dst->u[0].f = 0.0f; +@@ -740,12 +740,12 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); +- assert(type == src3->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); ++ assert(type == src3->node.data_type->e.numeric.type); + assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + assert(src3->node.data_type->dimx == 1); + +@@ -771,11 +771,11 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -841,12 +841,12 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co + { + unsigned int k; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +- switch (src1->node.data_type->base_type) ++ switch (src1->node.data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +@@ -877,12 +877,12 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + { + unsigned int k; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +- switch (src1->node.data_type->base_type) ++ switch (src1->node.data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +@@ -916,12 +916,12 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + { + unsigned int k; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +- switch (src1->node.data_type->base_type) ++ switch (src1->node.data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +@@ -955,14 +955,14 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + { + unsigned int k; + +- assert(dst_type->base_type == src1->node.data_type->base_type); +- assert(src2->node.data_type->base_type == HLSL_TYPE_INT); ++ assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); ++ assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); + + for (k = 0; k < dst_type->dimx; ++k) + { + unsigned int shift = src2->value.u[k].u % 32; + +- switch (src1->node.data_type->base_type) ++ switch (src1->node.data_type->e.numeric.type) + { + case HLSL_TYPE_INT: + dst->u[k].i = src1->value.u[k].i << shift; +@@ -983,11 +983,11 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1021,11 +1021,11 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1060,11 +1060,11 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1102,11 +1102,11 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; ++ enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->e.numeric.type); ++ assert(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1139,12 +1139,12 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + { + unsigned int k; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +- switch (src1->node.data_type->base_type) ++ switch (src1->node.data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +@@ -1175,32 +1175,13 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + { + unsigned int k; + +- assert(dst_type->base_type == src2->node.data_type->base_type); +- assert(dst_type->base_type == src3->node.data_type->base_type); ++ assert(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); ++ assert(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); ++ assert(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); for (k = 0; k < dst_type->dimx; ++k) - { @@ -9391,11 +14126,166 @@ index b76b1fce507..4cea98e9286 100644 return true; } +@@ -1209,14 +1190,14 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + { + unsigned int k; + +- assert(dst_type->base_type == src1->node.data_type->base_type); +- assert(src2->node.data_type->base_type == HLSL_TYPE_INT); ++ assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); ++ assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); + + for (k = 0; k < dst_type->dimx; ++k) + { + unsigned int shift = src2->value.u[k].u % 32; + +- switch (src1->node.data_type->base_type) ++ switch (src1->node.data_type->e.numeric.type) + { + case HLSL_TYPE_INT: + dst->u[k].i = src1->value.u[k].i >> shift; +@@ -1415,6 +1396,136 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + return success; + } + ++static bool constant_is_zero(struct hlsl_ir_constant *const_arg) ++{ ++ struct hlsl_type *data_type = const_arg->node.data_type; ++ unsigned int k; ++ ++ for (k = 0; k < data_type->dimx; ++k) ++ { ++ switch (data_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (const_arg->value.u[k].f != 0.0f) ++ return false; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (const_arg->value.u[k].d != 0.0) ++ return false; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_BOOL: ++ if (const_arg->value.u[k].u != 0) ++ return false; ++ break; ++ ++ default: ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool constant_is_one(struct hlsl_ir_constant *const_arg) ++{ ++ struct hlsl_type *data_type = const_arg->node.data_type; ++ unsigned int k; ++ ++ for (k = 0; k < data_type->dimx; ++k) ++ { ++ switch (data_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (const_arg->value.u[k].f != 1.0f) ++ return false; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (const_arg->value.u[k].d != 1.0) ++ return false; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_BOOL: ++ if (const_arg->value.u[k].u != 1) ++ return false; ++ break; ++ ++ default: ++ return false; ++ } ++ } ++ return true; ++} ++ ++bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_constant *const_arg = NULL; ++ struct hlsl_ir_node *mut_arg = NULL; ++ struct hlsl_ir_node *res_node; ++ struct hlsl_ir_expr *expr; ++ unsigned int i; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ ++ if (instr->data_type->class > HLSL_CLASS_VECTOR) ++ return false; ++ ++ /* Verify that the expression has two operands. */ ++ for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) ++ { ++ if (!!expr->operands[i].node != (i < 2)) ++ return false; ++ } ++ ++ if (expr->operands[0].node->type == HLSL_IR_CONSTANT) ++ { ++ const_arg = hlsl_ir_constant(expr->operands[0].node); ++ mut_arg = expr->operands[1].node; ++ } ++ else if (expr->operands[1].node->type == HLSL_IR_CONSTANT) ++ { ++ mut_arg = expr->operands[0].node; ++ const_arg = hlsl_ir_constant(expr->operands[1].node); ++ } ++ else ++ { ++ return false; ++ } ++ ++ res_node = NULL; ++ switch (expr->op) ++ { ++ case HLSL_OP2_ADD: ++ if (constant_is_zero(const_arg)) ++ res_node = mut_arg; ++ break; ++ ++ case HLSL_OP2_MUL: ++ if (constant_is_one(const_arg)) ++ res_node = mut_arg; ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (res_node) ++ { ++ hlsl_replace_node(&expr->node, res_node); ++ return true; ++ } ++ return false; ++} ++ + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + struct hlsl_constant_value value; diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index f0bd85338c6..eca18f4eb28 100644 +index f0bd85338c6..9514ddb980f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -17,6 +17,7 @@ +@@ -17,9 +17,11 @@ */ #include "vkd3d_shader_private.h" @@ -9403,7 +14293,11 @@ index f0bd85338c6..eca18f4eb28 100644 bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) { -@@ -32,6 +33,9 @@ void vsir_program_cleanup(struct vsir_program *program) ++ memset(program, 0, sizeof(*program)); + program->shader_version = *version; + return shader_instruction_array_init(&program->instructions, reserve); + } +@@ -32,6 +34,9 @@ void vsir_program_cleanup(struct vsir_program *program) vkd3d_free((void *)program->block_names[i]); vkd3d_free(program->block_names); shader_instruction_array_destroy(&program->instructions); @@ -9413,7 +14307,7 @@ index f0bd85338c6..eca18f4eb28 100644 } static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) -@@ -53,19 +57,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i +@@ -53,19 +58,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i vsir_instruction_init(ins, &location, VKD3DSIH_NOP); } @@ -9433,7 +14327,7 @@ index f0bd85338c6..eca18f4eb28 100644 static bool vsir_instruction_init_with_params(struct vsir_program *program, struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) -@@ -91,86 +82,164 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, +@@ -91,86 +83,164 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, return true; } @@ -9658,7 +14552,7 @@ index f0bd85338c6..eca18f4eb28 100644 } return VKD3D_OK; -@@ -227,10 +296,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( +@@ -227,10 +297,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( return NULL; } @@ -9673,7 +14567,7 @@ index f0bd85338c6..eca18f4eb28 100644 const struct vkd3d_shader_varying_map_info *varying_map; unsigned int i; -@@ -252,7 +322,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars +@@ -252,7 +323,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars * location with a different mask. */ if (input_mask && input_mask != e->mask) { @@ -9682,7 +14576,7 @@ index f0bd85338c6..eca18f4eb28 100644 "Aborting due to not yet implemented feature: " "Output mask %#x does not match input mask %#x.", e->mask, input_mask); -@@ -269,7 +339,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars +@@ -269,7 +340,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars { if (varying_map->varying_map[i].output_signature_index >= signature->element_count) { @@ -9691,7 +14585,7 @@ index f0bd85338c6..eca18f4eb28 100644 "Aborting due to not yet implemented feature: " "The next stage consumes varyings not written by this stage."); return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -453,7 +523,7 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader +@@ -453,7 +524,7 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) { @@ -9700,7 +14594,7 @@ index f0bd85338c6..eca18f4eb28 100644 param->reg.dimension = VSIR_DIMENSION_NONE; param->reg.idx[0].offset = label_id; } -@@ -464,12 +534,24 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned +@@ -464,12 +535,24 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned src->reg.idx[0].offset = idx; } @@ -9725,7 +14619,31 @@ index f0bd85338c6..eca18f4eb28 100644 static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -@@ -1383,10 +1465,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi +@@ -554,11 +637,14 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont + return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; + } + +-static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( ++struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( + struct vkd3d_shader_instruction_array *instructions) + { + struct vkd3d_shader_src_param *rel_addr; + ++ if (instructions->outpointid_param) ++ return instructions->outpointid_param; ++ + if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) + return NULL; + +@@ -566,6 +652,7 @@ static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( + rel_addr->swizzle = 0; + rel_addr->modifiers = 0; + ++ instructions->outpointid_param = rel_addr; + return rel_addr; + } + +@@ -1383,10 +1470,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi } } @@ -9738,7 +14656,7 @@ index f0bd85338c6..eca18f4eb28 100644 struct vkd3d_shader_instruction *ins; bool has_control_point_phase; unsigned int i, j; -@@ -1394,9 +1475,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse +@@ -1394,9 +1480,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse normaliser.phase = VKD3DSIH_INVALID; normaliser.shader_type = program->shader_version.type; normaliser.major = program->shader_version.major; @@ -9751,7 +14669,7 @@ index f0bd85338c6..eca18f4eb28 100644 for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) { -@@ -1439,9 +1520,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse +@@ -1439,9 +1525,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse } } @@ -9764,7 +14682,7 @@ index f0bd85338c6..eca18f4eb28 100644 { program->instructions = normaliser.instructions; return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -1668,19 +1749,20 @@ static void remove_dead_code(struct vsir_program *program) +@@ -1668,19 +1754,20 @@ static void remove_dead_code(struct vsir_program *program) } } @@ -9789,7 +14707,7 @@ index f0bd85338c6..eca18f4eb28 100644 return VKD3D_ERROR_OUT_OF_MEMORY; memset(srcs, 0, sizeof(*srcs) * 3); -@@ -1723,7 +1805,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser +@@ -1723,7 +1810,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser case VKD3DSIH_TEXREG2AR: case VKD3DSIH_TEXREG2GB: case VKD3DSIH_TEXREG2RGB: @@ -9798,7 +14716,7 @@ index f0bd85338c6..eca18f4eb28 100644 "Aborting due to not yet implemented feature: " "Combined sampler instruction %#x.", ins->handler_idx); return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -1789,10 +1871,10 @@ struct cf_flattener_info +@@ -1789,10 +1876,10 @@ struct cf_flattener_info struct cf_flattener { @@ -9811,7 +14729,7 @@ index f0bd85338c6..eca18f4eb28 100644 struct vkd3d_shader_instruction *instructions; size_t instruction_capacity; -@@ -1812,13 +1894,20 @@ struct cf_flattener +@@ -1812,13 +1899,20 @@ struct cf_flattener size_t control_flow_info_size; }; @@ -9833,7 +14751,7 @@ index f0bd85338c6..eca18f4eb28 100644 return NULL; } return &flattener->instructions[flattener->instruction_count]; -@@ -1850,9 +1939,9 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ +@@ -1850,9 +1944,9 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ { struct vkd3d_shader_src_param *params; @@ -9845,7 +14763,7 @@ index f0bd85338c6..eca18f4eb28 100644 return NULL; } ins->src = params; -@@ -1866,10 +1955,10 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int +@@ -1866,10 +1960,10 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int if (!(ins = cf_flattener_require_space(flattener, 1))) return; @@ -9858,7 +14776,7 @@ index f0bd85338c6..eca18f4eb28 100644 } /* For conditional branches, this returns the false target branch parameter. */ -@@ -1947,7 +2036,7 @@ static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_ +@@ -1947,7 +2041,7 @@ static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_ flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info))) { ERR("Failed to allocate control flow info structure.\n"); @@ -9867,7 +14785,7 @@ index f0bd85338c6..eca18f4eb28 100644 return NULL; } -@@ -2014,12 +2103,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla +@@ -2014,12 +2108,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla flattener->block_names[block_id] = buffer.buffer; } @@ -9883,7 +14801,7 @@ index f0bd85338c6..eca18f4eb28 100644 struct vkd3d_shader_instruction *dst_ins; size_t i; -@@ -2041,12 +2130,19 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte +@@ -2041,12 +2135,19 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte flattener->location = instruction->location; /* Declarations should occur before the first code block, which in hull shaders is marked by the first @@ -9908,7 +14826,7 @@ index f0bd85338c6..eca18f4eb28 100644 } cf_info = flattener->control_flow_depth -@@ -2064,7 +2160,8 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte +@@ -2064,7 +2165,8 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte break; case VKD3DSIH_LABEL: @@ -9918,7 +14836,7 @@ index f0bd85338c6..eca18f4eb28 100644 "Aborting due to not yet implemented feature: Label instruction."); return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -2229,8 +2326,10 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte +@@ -2229,8 +2331,10 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) { WARN("Unexpected src swizzle %#x.\n", src->swizzle); @@ -9930,7 +14848,7 @@ index f0bd85338c6..eca18f4eb28 100644 } value = *src->reg.u.immconst_u32; -@@ -2358,21 +2457,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte +@@ -2358,21 +2462,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte ++flattener->instruction_count; } @@ -9958,7 +14876,7 @@ index f0bd85338c6..eca18f4eb28 100644 program->instructions.elements = flattener.instructions; program->instructions.capacity = flattener.instruction_capacity; program->instructions.count = flattener.instruction_count; -@@ -2548,97 +2644,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) +@@ -2548,97 +2649,6 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) } } @@ -10056,7 +14974,7 @@ index f0bd85338c6..eca18f4eb28 100644 vkd3d_free(program->instructions.elements); vkd3d_free(block_map); program->instructions.elements = instructions; -@@ -2656,145 +2661,139 @@ fail: +@@ -2656,145 +2666,139 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } @@ -10291,7 +15209,7 @@ index f0bd85338c6..eca18f4eb28 100644 switch (ins->handler_idx) { -@@ -2803,62 +2802,21 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p +@@ -2803,62 +2807,21 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p break; case VKD3DSIH_BRANCH: @@ -10363,7 +15281,7 @@ index f0bd85338c6..eca18f4eb28 100644 case VKD3DSIH_PHI: continue; -@@ -2867,162 +2825,55 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p +@@ -2867,162 +2830,55 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p break; } @@ -10556,7 +15474,7 @@ index f0bd85338c6..eca18f4eb28 100644 } static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) -@@ -3031,22 +2882,21 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc +@@ -3031,22 +2887,21 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc for (i = 0; i < list->count; ++i) if (block == list->blocks[i]) @@ -10588,7 +15506,17 @@ index f0bd85338c6..eca18f4eb28 100644 /* `begin' points to the instruction immediately following the * LABEL that introduces the block. `end' points to the terminator * instruction (either BRANCH or RET). They can coincide, meaning -@@ -3089,12 +2939,209 @@ static void vsir_block_cleanup(struct vsir_block *block) +@@ -3063,8 +2918,7 @@ static enum vkd3d_result vsir_block_init(struct vsir_block *block, unsigned int + if (block_count > SIZE_MAX - (sizeof(*block->dominates) * CHAR_BIT - 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + +- block_count = align(block_count, sizeof(*block->dominates) * CHAR_BIT); +- byte_count = block_count / CHAR_BIT; ++ byte_count = VKD3D_BITMAP_SIZE(block_count) * sizeof(*block->dominates); + + assert(label); + memset(block, 0, sizeof(*block)); +@@ -3089,12 +2943,211 @@ static void vsir_block_cleanup(struct vsir_block *block) vkd3d_free(block->dominates); } @@ -10747,6 +15675,8 @@ index f0bd85338c6..eca18f4eb28 100644 { + struct vkd3d_shader_message_context *message_context; struct vsir_program *program; ++ size_t function_begin; ++ size_t function_end; struct vsir_block *blocks; struct vsir_block *entry; size_t block_count; @@ -10798,7 +15728,7 @@ index f0bd85338c6..eca18f4eb28 100644 }; static void vsir_cfg_cleanup(struct vsir_cfg *cfg) -@@ -3104,7 +3151,44 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) +@@ -3104,7 +3157,44 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) for (i = 0; i < cfg->block_count; ++i) vsir_block_cleanup(&cfg->blocks[i]); @@ -10843,20 +15773,11 @@ index f0bd85338c6..eca18f4eb28 100644 } static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, -@@ -3145,268 +3229,1838 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) - shape = "trapezium"; - break; +@@ -3153,260 +3243,1917 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) + vkd3d_unreachable(); + } -- case VKD3DSIH_BRANCH: -- shape = vsir_register_is_label(&block->end->src[0].reg) ? "ellipse" : "box"; -+ case VKD3DSIH_BRANCH: -+ shape = vsir_register_is_label(&block->end->src[0].reg) ? "ellipse" : "box"; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ +- TRACE(" n%u [label=\"%u\", shape=\"%s\"];\n", block->label, block->label, shape); + TRACE(" n%u [label=\"%u\", shape=\"%s\"];\n", block->label, block->label, shape); + + for (j = 0; j < block->successors.count; ++j) @@ -10958,7 +15879,8 @@ index f0bd85338c6..eca18f4eb28 100644 +} + +static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) ++ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, ++ size_t *pos) +{ + struct vsir_block *current_block = NULL; + enum vkd3d_result ret; @@ -10969,6 +15891,7 @@ index f0bd85338c6..eca18f4eb28 100644 + cfg->program = program; + cfg->block_count = program->block_count; + cfg->target = target; ++ cfg->function_begin = *pos; + + vsir_block_list_init(&cfg->order); + @@ -10978,9 +15901,10 @@ index f0bd85338c6..eca18f4eb28 100644 + if (TRACE_ON()) + vkd3d_string_buffer_init(&cfg->debug_buffer); + -+ for (i = 0; i < program->instructions.count; ++i) ++ for (i = *pos; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; ++ bool finish = false; + + switch (instruction->handler_idx) + { @@ -11012,11 +15936,24 @@ index f0bd85338c6..eca18f4eb28 100644 + current_block = NULL; + break; + ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: ++ assert(!current_block); ++ finish = true; ++ break; ++ + default: + break; + } ++ ++ if (finish) ++ break; + } + ++ *pos = i; ++ cfg->function_end = *pos; ++ + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; @@ -12473,118 +17410,152 @@ index f0bd85338c6..eca18f4eb28 100644 + if ((ret = vsir_cfg_structure_list_emit_jump(cfg, &structure->u.jump, + loop_idx)) < 0) + return ret; - break; - - default: - vkd3d_unreachable(); - } -- -- TRACE(" n%u [label=\"%u\", shape=\"%s\"];\n", block->label, block->label, shape); -- -- for (j = 0; j < block->successors.count; ++j) -- TRACE(" n%u -> n%u;\n", block->label, block->successors.blocks[j]->label); - } - -- TRACE("}\n"); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ } ++ + return VKD3D_OK; - } - --static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program) ++} ++ +static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) +{ + return vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX); +} + +static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) ++ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, ++ size_t *pos) ++{ ++ enum vkd3d_result ret; ++ struct vsir_cfg cfg; ++ ++ if ((ret = vsir_cfg_init(&cfg, program, message_context, target, pos)) < 0) ++ return ret; ++ ++ vsir_cfg_compute_dominators(&cfg); ++ ++ if ((ret = vsir_cfg_compute_loops(&cfg)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0) ++ goto out; ++ ++ if ((ret = vsir_cfg_optimize(&cfg)) < 0) ++ goto out; + +- for (j = 0; j < block->successors.count; ++j) +- TRACE(" n%u -> n%u;\n", block->label, block->successors.blocks[j]->label); +- } ++ ret = vsir_cfg_emit_structured_program(&cfg); + +- TRACE("}\n"); ++out: ++ vsir_cfg_cleanup(&cfg); ++ ++ return ret; + } + +-static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program) ++static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) { - struct vsir_block *current_block = NULL; ++ struct vsir_cfg_emit_target target = {0}; enum vkd3d_result ret; -- size_t i; -+ struct vsir_cfg cfg; + size_t i; - memset(cfg, 0, sizeof(*cfg)); - cfg->program = program; - cfg->block_count = program->block_count; -+ if ((ret = vsir_cfg_init(&cfg, program, message_context, target)) < 0) -+ return ret; ++ target.jump_target_temp_idx = program->temp_count; ++ target.temp_count = program->temp_count + 1; - if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -+ vsir_cfg_compute_dominators(&cfg); ++ if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; - for (i = 0; i < program->instructions.count; ++i) -- { ++ for (i = 0; i < program->instructions.count;) + { - struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; -+ if ((ret = vsir_cfg_compute_loops(&cfg)) < 0) -+ goto out; ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (instruction->handler_idx) -- { ++ switch (ins->handler_idx) + { - case VKD3DSIH_PHI: - case VKD3DSIH_SWITCH_MONOLITHIC: - vkd3d_unreachable(); -+ if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0) -+ goto out; - -- case VKD3DSIH_LABEL: +- + case VKD3DSIH_LABEL: - { - unsigned int label = label_from_src_param(&instruction->src[0]); -+ if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) -+ goto out; - +- - assert(!current_block); - assert(label > 0); - assert(label <= cfg->block_count); - current_block = &cfg->blocks[label - 1]; - assert(current_block->label == 0); - if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) -- goto fail; ++ assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); ++ TRACE("Structurizing a non-hull shader.\n"); ++ if ((ret = vsir_program_structurize_function(program, message_context, ++ &target, &i)) < 0) + goto fail; - current_block->begin = &program->instructions.elements[i + 1]; - if (!cfg->entry) - cfg->entry = current_block; -- break; ++ assert(i == program->instructions.count); + break; - } -+ if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0) -+ goto out; - case VKD3DSIH_BRANCH: - case VKD3DSIH_RET: - assert(current_block); - current_block->end = instruction; - current_block = NULL; -- break; -+ if ((ret = vsir_cfg_optimize(&cfg)) < 0) -+ goto out; ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: ++ assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); ++ TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); ++ target.instructions[target.ins_count++] = *ins; ++ ++i; ++ if ((ret = vsir_program_structurize_function(program, message_context, ++ &target, &i)) < 0) ++ goto fail; + break; -- default: -- break; -- } -- } -+ ret = vsir_cfg_emit_structured_program(&cfg); + default: ++ if (!reserve_instructions(&target.instructions, &target.ins_capacity, target.ins_count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ target.instructions[target.ins_count++] = *ins; ++ ++i; + break; + } + } - for (i = 0; i < cfg->block_count; ++i) - { - struct vsir_block *block = &cfg->blocks[i]; -+out: -+ vsir_cfg_cleanup(&cfg); - +- - if (block->label == 0) - continue; -+ return ret; -+} - +- - switch (block->end->handler_idx) - { - case VKD3DSIH_RET: - break; -+static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vsir_cfg_emit_target target = {0}; -+ enum vkd3d_result ret; -+ unsigned int i; - +- - case VKD3DSIH_BRANCH: - if (vsir_register_is_label(&block->end->src[0].reg)) - { @@ -12595,35 +17566,19 @@ index f0bd85338c6..eca18f4eb28 100644 - { - if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[1])) < 0) - goto fail; -+ target.jump_target_temp_idx = program->temp_count; -+ target.temp_count = program->temp_count + 1; - +- - if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[2])) < 0) - goto fail; - } - break; -+ if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - +- - default: - vkd3d_unreachable(); - } -+ /* Copy declarations until the first block. */ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ -+ if (ins->handler_idx == VKD3DSIH_LABEL) -+ break; -+ -+ target.instructions[target.ins_count++] = *ins; - } - +- } +- - if (TRACE_ON()) - vsir_cfg_dump_dot(cfg); -+ if ((ret = vsir_program_structurize_function(program, message_context, &target)) < 0) -+ goto fail; -+ + vkd3d_free(program->instructions.elements); + program->instructions.elements = target.instructions; + program->instructions.capacity = target.ins_capacity; @@ -12708,9 +17663,9 @@ index f0bd85338c6..eca18f4eb28 100644 struct vsir_block *block = &cfg->blocks[i]; + struct vkd3d_shader_instruction *ins; -- if (block->label == 0) -- continue; -- + if (block->label == 0) + continue; + - vsir_cfg_compute_dominators_recurse(cfg->entry, block); - - if (TRACE_ON()) @@ -12738,47 +17693,70 @@ index f0bd85338c6..eca18f4eb28 100644 - if (TRACE_ON()) - vkd3d_string_buffer_cleanup(&buf); -} -- ++ for (i = 0; i < cfg->block_count; ++i) ++ { ++ struct vsir_block *block = &cfg->blocks[i]; ++ struct vkd3d_shader_instruction *ins; + -enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info) -{ - struct vkd3d_shader_instruction_array *instructions = &parser->program.instructions; - enum vkd3d_result result = VKD3D_OK; -- -- remove_dcl_temps(&parser->program); -- -- if ((result = instruction_array_lower_texkills(parser)) < 0) -- return result; -- -- if (parser->shader_desc.is_dxil) -+ for (i = 0; i < cfg->block_count; ++i) - { -- struct vsir_cfg cfg; -- -- if ((result = lower_switch_to_if_ladder(&parser->program)) < 0) -- return result; -- -- if ((result = materialize_ssas_to_temps(parser)) < 0) -- return result; -- -- if ((result = vsir_cfg_init(&cfg, &parser->program)) < 0) -- return result; -- -- vsir_cfg_compute_dominators(&cfg); -+ struct vsir_block *block = &cfg->blocks[i]; -+ struct vkd3d_shader_instruction *ins; ++ if (block->label == 0) ++ continue; -- if ((result = simple_structurizer_run(parser)) < 0) +- remove_dcl_temps(&parser->program); + for (ins = block->begin; ins <= block->end; ++ins) - { -- vsir_cfg_cleanup(&cfg); -- return result; ++ { + for (j = 0; j < ins->src_count; ++j) + register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks); - } -- ++ } ++ } + +- if ((result = instruction_array_lower_texkills(parser)) < 0) +- return result; ++ if (alloc.next_temp_idx == program->temp_count) ++ goto done; + +- if (parser->shader_desc.is_dxil) +- { +- struct vsir_cfg cfg; ++ TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); + +- if ((result = lower_switch_to_if_ladder(&parser->program)) < 0) +- return result; ++ for (i = cfg->function_begin; i < cfg->function_end; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- if ((result = materialize_ssas_to_temps(parser)) < 0) +- return result; ++ for (j = 0; j < ins->dst_count; ++j) ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); + +- if ((result = vsir_cfg_init(&cfg, &parser->program)) < 0) +- return result; ++ for (j = 0; j < ins->src_count; ++j) ++ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); ++ } + +- vsir_cfg_compute_dominators(&cfg); ++ program->temp_count = alloc.next_temp_idx; ++done: ++ vkd3d_free(origin_blocks); ++ vkd3d_free(alloc.table); + +- if ((result = simple_structurizer_run(parser)) < 0) +- { +- vsir_cfg_cleanup(&cfg); +- return result; +- } ++ return VKD3D_OK; ++} + - vsir_cfg_cleanup(&cfg); - } +- } - else - { - if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_PIXEL) @@ -12786,72 +17764,89 @@ index f0bd85338c6..eca18f4eb28 100644 - if ((result = remap_output_signature(parser, compile_info)) < 0) - return result; - } -- ++static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_function( ++ struct vsir_program *program, struct vkd3d_shader_message_context *message_context, ++ size_t *pos) ++{ ++ enum vkd3d_result ret; ++ struct vsir_cfg cfg; + - if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL) - { - if ((result = instruction_array_flatten_hull_shader_phases(instructions)) < 0) - return result; ++ if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL, pos)) < 0) ++ return ret; - if ((result = instruction_array_normalise_hull_shader_control_point_io(instructions, - &parser->shader_desc.input_signature)) < 0) - return result; - } -+ if (alloc.next_temp_idx == program->temp_count) -+ goto done; ++ vsir_cfg_compute_dominators(&cfg); - if ((result = shader_normalise_io_registers(parser)) < 0) - return result; -+ TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); ++ ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg); - if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0) - return result; -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ vsir_cfg_cleanup(&cfg); - remove_dead_code(&parser->program); -+ for (j = 0; j < ins->dst_count; ++j) -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg); ++ return ret; ++} - if ((result = normalise_combined_samplers(parser)) < 0) - return result; -+ for (j = 0; j < ins->src_count; ++j) -+ materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); - } - -- if ((result = flatten_control_flow_constructs(parser)) < 0) -- return result; -+ program->temp_count = alloc.next_temp_idx; -+done: -+ vkd3d_free(origin_blocks); -+ vkd3d_free(alloc.table); - -- if (TRACE_ON()) -- vkd3d_shader_trace(&parser->program); -+ return VKD3D_OK; -+} - -- if (!parser->failed && (result = vsir_validate(parser)) < 0) -- return result; +- } +static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + enum vkd3d_result ret; -+ struct vsir_cfg cfg; -+ -+ if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL)) < 0) -+ return ret; ++ size_t i; + +- if ((result = flatten_control_flow_constructs(parser)) < 0) +- return result; ++ for (i = 0; i < program->instructions.count;) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- if (TRACE_ON()) +- vkd3d_shader_trace(&parser->program); ++ switch (ins->handler_idx) ++ { ++ case VKD3DSIH_LABEL: ++ assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); ++ TRACE("Materializing undominated SSAs in a non-hull shader.\n"); ++ if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( ++ program, message_context, &i)) < 0) ++ return ret; ++ assert(i == program->instructions.count); ++ break; + +- if (!parser->failed && (result = vsir_validate(parser)) < 0) +- return result; ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: ++ assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); ++ TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); ++ ++i; ++ if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( ++ program, message_context, &i)) < 0) ++ return ret; ++ break; - if (parser->failed) - result = VKD3D_ERROR_INVALID_SHADER; -+ vsir_cfg_compute_dominators(&cfg); ++ default: ++ ++i; ++ break; ++ } ++ } - return result; -+ ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg); -+ -+ vsir_cfg_cleanup(&cfg); -+ -+ return ret; ++ return VKD3D_OK; } struct validation_context @@ -12866,7 +17861,7 @@ index f0bd85338c6..eca18f4eb28 100644 bool dcl_temps_found; enum vkd3d_shader_opcode phase; enum cf_type -@@ -3452,16 +5106,21 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c +@@ -3452,16 +5199,21 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c if (ctx->invalid_instruction_idx) { @@ -12890,7 +17885,7 @@ index f0bd85338c6..eca18f4eb28 100644 } static void vsir_validate_src_param(struct validation_context *ctx, -@@ -3515,10 +5174,10 @@ static void vsir_validate_register(struct validation_context *ctx, +@@ -3515,10 +5267,10 @@ static void vsir_validate_register(struct validation_context *ctx, if (reg->idx[0].rel_addr) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); @@ -12903,7 +17898,7 @@ index f0bd85338c6..eca18f4eb28 100644 break; } -@@ -3606,7 +5265,7 @@ static void vsir_validate_register(struct validation_context *ctx, +@@ -3606,7 +5358,7 @@ static void vsir_validate_register(struct validation_context *ctx, validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", reg->precision); @@ -12912,7 +17907,7 @@ index f0bd85338c6..eca18f4eb28 100644 validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", reg->data_type); -@@ -3708,7 +5367,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx, +@@ -3708,7 +5460,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx, switch (dst->reg.type) { case VKD3DSPR_SSA: @@ -12921,7 +17916,7 @@ index f0bd85338c6..eca18f4eb28 100644 { struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; -@@ -3761,7 +5420,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, +@@ -3761,7 +5513,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, switch (src->reg.type) { case VKD3DSPR_SSA: @@ -12930,7 +17925,7 @@ index f0bd85338c6..eca18f4eb28 100644 { struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; unsigned int i; -@@ -3852,7 +5511,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) +@@ -3852,7 +5604,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) size_t i; instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; @@ -12938,7 +17933,7 @@ index f0bd85338c6..eca18f4eb28 100644 for (i = 0; i < instruction->dst_count; ++i) vsir_validate_dst_param(ctx, &instruction->dst[i]); -@@ -3884,6 +5542,46 @@ static void vsir_validate_instruction(struct validation_context *ctx) +@@ -3884,11 +5635,74 @@ static void vsir_validate_instruction(struct validation_context *ctx) ctx->dcl_temps_found = false; return; @@ -12950,9 +17945,30 @@ index f0bd85338c6..eca18f4eb28 100644 + instruction->declaration.max_tessellation_factor); + return; + -+ /* The DXIL parser can generate these outside phases, but this is not an issue. */ -+ case VKD3DSIH_DCL_INPUT: -+ case VKD3DSIH_DCL_OUTPUT: ++ case VKD3DSIH_DCL_INPUT_PRIMITIVE: ++ if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED ++ || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", ++ instruction->declaration.primitive_type.type); ++ return; ++ ++ case VKD3DSIH_DCL_VERTICES_OUT: ++ if (instruction->declaration.count > 1024) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", ++ instruction->declaration.count); ++ return; ++ ++ case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: ++ if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED ++ || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", ++ instruction->declaration.primitive_type.type); ++ return; ++ ++ case VKD3DSIH_DCL_GS_INSTANCES: ++ if (!instruction->declaration.count || instruction->declaration.count > 32) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", ++ instruction->declaration.count); + return; + + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: @@ -12985,7 +18001,25 @@ index f0bd85338c6..eca18f4eb28 100644 default: break; } -@@ -4203,17 +5901,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) + +- if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID) ++ /* Only DCL instructions may occur outside hull shader phases. */ ++ if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL ++ && ctx->phase == VKD3DSIH_INVALID) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Instruction %#x appear before any phase instruction in a hull shader.", + instruction->handler_idx); +@@ -4180,7 +5994,8 @@ static void vsir_validate_instruction(struct validation_context *ctx) + unsigned int value_idx = 2 * i; + unsigned int label_idx = 2 * i + 1; + +- if (!register_is_constant(&instruction->src[value_idx].reg) && !register_is_ssa(&instruction->src[value_idx].reg)) ++ if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) ++ && !register_is_ssa(&instruction->src[value_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid value register for incoming %zu of type %#x in PHI instruction, " + "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); +@@ -4203,17 +6018,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) } } @@ -13010,7 +18044,7 @@ index f0bd85338c6..eca18f4eb28 100644 return VKD3D_OK; if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) -@@ -4222,7 +5923,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) +@@ -4222,7 +6040,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) goto fail; @@ -13019,7 +18053,7 @@ index f0bd85338c6..eca18f4eb28 100644 vsir_validate_instruction(&ctx); ctx.invalid_instruction_idx = true; -@@ -4247,7 +5948,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) +@@ -4247,7 +6065,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) vkd3d_free(ctx.temps); vkd3d_free(ctx.ssas); @@ -13028,7 +18062,7 @@ index f0bd85338c6..eca18f4eb28 100644 fail: vkd3d_free(ctx.blocks); -@@ -4256,3 +5957,72 @@ fail: +@@ -4256,3 +6074,72 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } @@ -13102,7 +18136,7 @@ index f0bd85338c6..eca18f4eb28 100644 + return result; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 5c87ff15503..c4e712b8471 100644 +index 5c87ff15503..4ee8e6bba4c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -45,6 +45,8 @@ static spv_target_env spv_target_env_from_vkd3d(enum vkd3d_shader_spirv_environm @@ -13124,7 +18158,15 @@ index 5c87ff15503..c4e712b8471 100644 #define VKD3D_SPIRV_GENERATOR_ID 18 #define VKD3D_SPIRV_GENERATOR_VERSION 11 #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) -@@ -1524,6 +1527,19 @@ static uint32_t vkd3d_spirv_build_op_logical_equal(struct vkd3d_spirv_builder *b +@@ -358,6 +361,7 @@ struct vkd3d_spirv_builder + uint32_t type_sampler_id; + uint32_t type_bool_id; + uint32_t type_void_id; ++ uint32_t scope_subgroup_id; + + struct vkd3d_spirv_stream debug_stream; /* debug instructions */ + struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */ +@@ -1524,6 +1528,19 @@ static uint32_t vkd3d_spirv_build_op_logical_equal(struct vkd3d_spirv_builder *b SpvOpLogicalEqual, result_type, operand0, operand1); } @@ -13144,7 +18186,71 @@ index 5c87ff15503..c4e712b8471 100644 static uint32_t vkd3d_spirv_build_op_convert_utof(struct vkd3d_spirv_builder *builder, uint32_t result_type, uint32_t unsigned_value) { -@@ -1825,6 +1841,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder +@@ -1725,6 +1742,63 @@ static void vkd3d_spirv_build_op_memory_barrier(struct vkd3d_spirv_builder *buil + SpvOpMemoryBarrier, memory_id, memory_semantics_id); + } + ++static uint32_t vkd3d_spirv_build_op_scope_subgroup(struct vkd3d_spirv_builder *builder) ++{ ++ return vkd3d_spirv_get_op_constant(builder, vkd3d_spirv_get_op_type_int(builder, 32, 0), SpvScopeSubgroup); ++} ++ ++static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *builder) ++{ ++ return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); ++ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpGroupNonUniformBallot, ++ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, SpvGroupOperation group_op, uint32_t val_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBallotBitCount, ++ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), group_op, val_id); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_elect(struct vkd3d_spirv_builder *builder) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); ++ return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpGroupNonUniformElect, ++ vkd3d_spirv_get_op_type_bool(builder), vkd3d_spirv_get_op_scope_subgroup(builder)); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id, uint32_t lane_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcast, result_type, ++ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_shuffle(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id, uint32_t lane_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformShuffle); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformShuffle, result_type, ++ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast_first(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); ++ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcastFirst, ++ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); ++} ++ + static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, + enum GLSLstd450 op, uint32_t result_type, uint32_t operand) + { +@@ -1825,6 +1899,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder { switch (data_type) { @@ -13152,7 +18258,7 @@ index 5c87ff15503..c4e712b8471 100644 case VKD3D_DATA_FLOAT: case VKD3D_DATA_SNORM: case VKD3D_DATA_UNORM: -@@ -1832,6 +1849,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder +@@ -1832,6 +1907,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder break; case VKD3D_DATA_INT: case VKD3D_DATA_UINT: @@ -13160,7 +18266,7 @@ index 5c87ff15503..c4e712b8471 100644 return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); break; case VKD3D_DATA_DOUBLE: -@@ -1900,7 +1918,7 @@ static void vkd3d_spirv_builder_free(struct vkd3d_spirv_builder *builder) +@@ -1900,7 +1976,7 @@ static void vkd3d_spirv_builder_free(struct vkd3d_spirv_builder *builder) } static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, @@ -13169,7 +18275,7 @@ index 5c87ff15503..c4e712b8471 100644 { uint64_t capability_mask = builder->capability_mask; struct vkd3d_spirv_stream stream; -@@ -1911,7 +1929,8 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, +@@ -1911,7 +1987,8 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, vkd3d_spirv_stream_init(&stream); vkd3d_spirv_build_word(&stream, SpvMagicNumber); @@ -13179,7 +18285,7 @@ index 5c87ff15503..c4e712b8471 100644 vkd3d_spirv_build_word(&stream, VKD3D_SPIRV_GENERATOR_MAGIC); vkd3d_spirv_build_word(&stream, builder->current_id); /* bound */ vkd3d_spirv_build_word(&stream, 0); /* schema, reserved */ -@@ -1940,6 +1959,9 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, +@@ -1940,6 +2017,9 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStorageImageArrayDynamicIndexing) || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderNonUniformEXT)) vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_descriptor_indexing"); @@ -13189,7 +18295,7 @@ index 5c87ff15503..c4e712b8471 100644 if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStencilExportEXT)) vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_shader_stencil_export"); if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderViewportIndexLayerEXT)) -@@ -2346,6 +2368,7 @@ struct spirv_compiler +@@ -2346,6 +2426,7 @@ struct spirv_compiler unsigned int output_control_point_count; bool use_vocp; @@ -13197,7 +18303,7 @@ index 5c87ff15503..c4e712b8471 100644 bool emit_point_size; enum vkd3d_shader_opcode phase; -@@ -2427,14 +2450,14 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) +@@ -2427,14 +2508,13 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) vkd3d_free(compiler); } @@ -13206,8 +18312,9 @@ index 5c87ff15503..c4e712b8471 100644 +static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, - uint64_t config_flags) +- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, +- uint64_t config_flags) ++ struct vkd3d_shader_message_context *message_context, uint64_t config_flags) { - const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; - const struct shader_signature *output_signature = &shader_desc->output_signature; @@ -13216,7 +18323,16 @@ index 5c87ff15503..c4e712b8471 100644 const struct vkd3d_shader_interface_info *shader_interface; const struct vkd3d_shader_descriptor_offset_info *offset_info; const struct vkd3d_shader_spirv_target_info *target_info; -@@ -2456,6 +2479,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve +@@ -2447,7 +2527,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve + + memset(compiler, 0, sizeof(*compiler)); + compiler->message_context = message_context; +- compiler->location = *location; ++ compiler->location.source_name = compile_info->source_name; + compiler->config_flags = config_flags; + + if ((target_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO))) +@@ -2456,6 +2536,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve { case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: @@ -13224,7 +18340,7 @@ index 5c87ff15503..c4e712b8471 100644 break; default: WARN("Invalid target environment %#x.\n", target_info->environment); -@@ -2545,7 +2569,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve +@@ -2545,7 +2626,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve rb_init(&compiler->symbol_table, vkd3d_symbol_compare); @@ -13233,7 +18349,32 @@ index 5c87ff15503..c4e712b8471 100644 if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { -@@ -3372,7 +3396,7 @@ struct vkd3d_shader_register_info +@@ -2608,6 +2689,11 @@ static bool spirv_compiler_is_opengl_target(const struct spirv_compiler *compile + return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5; + } + ++static bool spirv_compiler_is_spirv_min_1_3_target(const struct spirv_compiler *compiler) ++{ ++ return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; ++} ++ + static bool spirv_compiler_is_target_extension_supported(const struct spirv_compiler *compiler, + enum vkd3d_shader_spirv_extension extension) + { +@@ -3126,6 +3212,12 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s + case VKD3DSPR_OUTSTENCILREF: + snprintf(buffer, buffer_size, "oStencilRef"); + break; ++ case VKD3DSPR_WAVELANECOUNT: ++ snprintf(buffer, buffer_size, "vWaveLaneCount"); ++ break; ++ case VKD3DSPR_WAVELANEINDEX: ++ snprintf(buffer, buffer_size, "vWaveLaneIndex"); ++ break; + default: + FIXME("Unhandled register %#x.\n", reg->type); + snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); +@@ -3372,7 +3464,7 @@ struct vkd3d_shader_register_info bool is_aggregate; }; @@ -13242,7 +18383,7 @@ index 5c87ff15503..c4e712b8471 100644 const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_info *register_info) { struct vkd3d_symbol reg_symbol, *symbol; -@@ -3398,7 +3422,8 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil +@@ -3398,7 +3490,8 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil vkd3d_symbol_make_register(®_symbol, reg); if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) { @@ -13252,7 +18393,18 @@ index 5c87ff15503..c4e712b8471 100644 memset(register_info, 0, sizeof(*register_info)); return false; } -@@ -3736,6 +3761,70 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil +@@ -3548,8 +3641,9 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp + indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); + } + ++ /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ + if (reg->alignment) +- WARN("Ignoring alignment %u.\n", reg->alignment); ++ TRACE("Ignoring alignment %u.\n", reg->alignment); + + if (index_count) + { +@@ -3736,6 +3830,70 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); } @@ -13323,7 +18475,7 @@ index 5c87ff15503..c4e712b8471 100644 static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) { -@@ -3748,14 +3837,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile +@@ -3748,14 +3906,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile if (reg->dimension == VSIR_DIMENSION_SCALAR) { for (i = 0; i < component_count; ++i) @@ -13341,7 +18493,7 @@ index 5c87ff15503..c4e712b8471 100644 } } -@@ -3899,6 +3989,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil +@@ -3899,6 +4058,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil switch (icb->data_type) { @@ -13355,7 +18507,7 @@ index 5c87ff15503..c4e712b8471 100644 case VKD3D_DATA_FLOAT: case VKD3D_DATA_INT: case VKD3D_DATA_UINT: -@@ -3998,7 +4095,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, +@@ -3998,7 +4164,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, struct vkd3d_shader_register_info reg_info; unsigned int component_count; uint32_t type_id, val_id; @@ -13364,7 +18516,7 @@ index 5c87ff15503..c4e712b8471 100644 if (reg->type == VKD3DSPR_IMMCONST) return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); -@@ -4018,17 +4115,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, +@@ -4018,17 +4184,17 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); return vkd3d_spirv_get_op_undef(builder, type_id); } @@ -13385,7 +18537,7 @@ index 5c87ff15503..c4e712b8471 100644 { return spirv_compiler_emit_load_scalar(compiler, reg, swizzle, write_mask, ®_info); } -@@ -4041,7 +4138,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, +@@ -4041,7 +4207,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; val_id = spirv_compiler_emit_swizzle(compiler, @@ -13394,7 +18546,7 @@ index 5c87ff15503..c4e712b8471 100644 if (component_type != reg_info.component_type) { -@@ -4087,7 +4184,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, +@@ -4087,7 +4253,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, uint32_t type_id; type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); @@ -13403,7 +18555,7 @@ index 5c87ff15503..c4e712b8471 100644 return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id); FIXME("Unhandled data type %#x.\n", reg->data_type); -@@ -4101,7 +4198,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, +@@ -4101,7 +4267,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, uint32_t type_id; type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); @@ -13412,7 +18564,7 @@ index 5c87ff15503..c4e712b8471 100644 return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); else if (data_type_is_integer(reg->data_type)) return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); -@@ -4285,7 +4382,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, +@@ -4285,7 +4451,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, } type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); @@ -13421,7 +18573,7 @@ index 5c87ff15503..c4e712b8471 100644 return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id); FIXME("Unhandled data type %#x.\n", reg->data_type); -@@ -4322,11 +4419,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp +@@ -4322,11 +4488,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp { unsigned int component_count = vsir_write_mask_component_count(dst->write_mask); struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -13435,7 +18587,7 @@ index 5c87ff15503..c4e712b8471 100644 val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, component_ids, component_count); } -@@ -4334,6 +4431,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp +@@ -4334,6 +4500,11 @@ static void spirv_compiler_emit_store_dst_components(struct spirv_compiler *comp { val_id = *component_ids; } @@ -13447,7 +18599,67 @@ index 5c87ff15503..c4e712b8471 100644 spirv_compiler_emit_store_dst(compiler, dst, val_id); } -@@ -6272,9 +6374,24 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -4433,6 +4604,10 @@ static void spirv_compiler_decorate_builtin(struct spirv_compiler *compiler, + case SpvBuiltInCullDistance: + vkd3d_spirv_enable_capability(builder, SpvCapabilityCullDistance); + break; ++ case SpvBuiltInSubgroupSize: ++ case SpvBuiltInSubgroupLocalInvocationId: ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); ++ break; + default: + break; + } +@@ -4622,6 +4797,9 @@ vkd3d_register_builtins[] = + {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + + {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, ++ ++ {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, ++ {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, + }; + + static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, +@@ -5670,9 +5848,26 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler + flags &= ~VKD3DSGF_ENABLE_INT64; + } + ++ if (flags & VKD3DSGF_ENABLE_WAVE_INTRINSICS) ++ { ++ if (!(compiler->features & VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS)) ++ { ++ WARN("Unsupported wave ops.\n"); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, ++ "The target environment does not support wave ops."); ++ } ++ else if (!spirv_compiler_is_spirv_min_1_3_target(compiler)) ++ { ++ WARN("Wave ops enabled but environment does not support SPIR-V 1.3 or greater.\n"); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, ++ "The target environment uses wave ops but does not support SPIR-V 1.3 or greater."); ++ } ++ flags &= ~VKD3DSGF_ENABLE_WAVE_INTRINSICS; ++ } ++ + if (flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)) + FIXME("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); +- else ++ else if (flags) + WARN("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); + } + +@@ -5734,8 +5929,9 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil + vsir_register_init(®, VKD3DSPR_IDXTEMP, VKD3D_DATA_FLOAT, 1); + reg.idx[0].offset = temp->register_idx; + ++ /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ + if (temp->alignment) +- WARN("Ignoring alignment %u.\n", temp->alignment); ++ TRACE("Ignoring alignment %u.\n", temp->alignment); + + function_location = spirv_compiler_get_current_function_location(compiler); + vkd3d_spirv_begin_function_stream_insertion(builder, function_location); +@@ -6272,9 +6468,24 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); @@ -13473,7 +18685,7 @@ index 5c87ff15503..c4e712b8471 100644 if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) { assert(structure_stride); /* counters are valid only for structured buffers */ -@@ -6324,20 +6441,26 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -6324,20 +6535,26 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, @@ -13503,7 +18715,7 @@ index 5c87ff15503..c4e712b8471 100644 spirv_compiler_emit_register_debug_name(builder, var_id, reg); -@@ -6352,8 +6475,8 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, +@@ -6352,8 +6569,8 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; @@ -13514,7 +18726,7 @@ index 5c87ff15503..c4e712b8471 100644 } static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler, -@@ -6361,8 +6484,8 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi +@@ -6361,8 +6578,8 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi { const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; unsigned int stride = tgsm_structured->byte_stride / 4; @@ -13525,7 +18737,7 @@ index 5c87ff15503..c4e712b8471 100644 } static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, -@@ -6871,7 +6994,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, +@@ -6871,7 +7088,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); @@ -13534,7 +18746,7 @@ index 5c87ff15503..c4e712b8471 100644 { val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); } -@@ -6880,7 +7003,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, +@@ -6880,7 +7097,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); } @@ -13543,7 +18755,7 @@ index 5c87ff15503..c4e712b8471 100644 { val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); } -@@ -6909,6 +7032,15 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil +@@ -6909,6 +7126,15 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil SpvOp op = SpvOpMax; unsigned int i; @@ -13559,7 +18771,7 @@ index 5c87ff15503..c4e712b8471 100644 if (src->reg.data_type == VKD3D_DATA_BOOL) { if (dst->reg.data_type == VKD3D_DATA_BOOL) -@@ -6997,6 +7129,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( +@@ -6997,6 +7223,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( } glsl_insts[] = { @@ -13567,7 +18779,7 @@ index 5c87ff15503..c4e712b8471 100644 {VKD3DSIH_ACOS, GLSLstd450Acos}, {VKD3DSIH_ASIN, GLSLstd450Asin}, {VKD3DSIH_ATAN, GLSLstd450Atan}, -@@ -7049,6 +7182,16 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp +@@ -7049,6 +7276,16 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp unsigned int i, component_count; enum GLSLstd450 glsl_inst; @@ -13584,7 +18796,7 @@ index 5c87ff15503..c4e712b8471 100644 glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); if (glsl_inst == GLSLstd450Bad) { -@@ -7093,8 +7236,8 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, +@@ -7093,8 +7330,8 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, struct vkd3d_shader_register_info dst_reg_info, src_reg_info; const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; @@ -13594,7 +18806,7 @@ index 5c87ff15503..c4e712b8471 100644 if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA || dst->modifiers || src->modifiers) -@@ -7145,7 +7288,13 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, +@@ -7145,7 +7382,13 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, } general_implementation: @@ -13609,7 +18821,7 @@ index 5c87ff15503..c4e712b8471 100644 if (dst->reg.data_type != src->reg.data_type) { val_id = vkd3d_spirv_build_op_bitcast(builder, vkd3d_spirv_get_type_id_for_data_type(builder, -@@ -7171,8 +7320,15 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, +@@ -7171,8 +7414,15 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); if (src[0].reg.data_type != VKD3D_DATA_BOOL) @@ -13627,7 +18839,7 @@ index 5c87ff15503..c4e712b8471 100644 val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src1_id, src2_id); spirv_compiler_emit_store_dst(compiler, dst, val_id); -@@ -7335,7 +7491,7 @@ static void spirv_compiler_emit_imad(struct spirv_compiler *compiler, +@@ -7335,7 +7585,7 @@ static void spirv_compiler_emit_imad(struct spirv_compiler *compiler, unsigned int i, component_count; component_count = vsir_write_mask_component_count(dst->write_mask); @@ -13636,7 +18848,7 @@ index 5c87ff15503..c4e712b8471 100644 for (i = 0; i < ARRAY_SIZE(src_ids); ++i) src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], dst->write_mask); -@@ -7684,6 +7840,56 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co +@@ -7684,6 +7934,56 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); } @@ -13693,7 +18905,7 @@ index 5c87ff15503..c4e712b8471 100644 static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction, uint32_t target_block_id) { -@@ -7702,11 +7908,31 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co +@@ -7702,11 +8002,31 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co return merge_block_id; } @@ -13725,7 +18937,7 @@ index 5c87ff15503..c4e712b8471 100644 if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) || is_in_control_point_phase(compiler))) spirv_compiler_emit_shader_epilogue_invocation(compiler); -@@ -7790,8 +8016,9 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, +@@ -7790,8 +8110,9 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, * a mismatch between the VSIR structure and the SPIR-V one, which would cause problems if * structurisation is necessary. Therefore we emit it as a function call. */ condition_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); @@ -13737,7 +18949,7 @@ index 5c87ff15503..c4e712b8471 100644 void_id = vkd3d_spirv_get_op_type_void(builder); vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), &condition_id, 1); -@@ -8570,7 +8797,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, +@@ -8570,7 +8891,6 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, reg_info.id, coordinate_id); constituents[j++] = vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } @@ -13745,7 +18957,7 @@ index 5c87ff15503..c4e712b8471 100644 spirv_compiler_emit_store_dst_components(compiler, dst, VKD3D_SHADER_COMPONENT_UINT, constituents); } -@@ -8678,8 +8904,8 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, +@@ -8678,8 +8998,8 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t base_coordinate_id, component_idx; @@ -13755,7 +18967,7 @@ index 5c87ff15503..c4e712b8471 100644 unsigned int component_count; if (!spirv_compiler_get_register_info(compiler, &dst->reg, ®_info)) -@@ -8691,9 +8917,9 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, +@@ -8691,9 +9011,9 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); @@ -13768,7 +18980,7 @@ index 5c87ff15503..c4e712b8471 100644 component_count = vsir_write_mask_component_count(dst->write_mask); for (component_idx = 0; component_idx < component_count; ++component_idx) -@@ -8944,6 +9170,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil +@@ -8944,6 +9264,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil const struct vkd3d_shader_dst_param *resource; uint32_t coordinate_id, sample_id, pointer_id; struct vkd3d_shader_register_info reg_info; @@ -13776,7 +18988,7 @@ index 5c87ff15503..c4e712b8471 100644 struct vkd3d_shader_image image; unsigned int structure_stride; uint32_t coordinate_mask; -@@ -9035,12 +9262,23 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil +@@ -9035,12 +9356,23 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil val_id = spirv_compiler_emit_load_src_with_type(compiler, &src[1], VKD3DSP_WRITEMASK_0, component_type); @@ -13802,7 +19014,7 @@ index 5c87ff15503..c4e712b8471 100644 operands[i++] = spirv_compiler_emit_load_src_with_type(compiler, &src[2], VKD3DSP_WRITEMASK_0, component_type); } operands[i++] = val_id; -@@ -9110,6 +9348,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, +@@ -9110,6 +9442,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; uint32_t type_id, lod_id, val_id, miplevel_count_id; @@ -13810,7 +19022,7 @@ index 5c87ff15503..c4e712b8471 100644 uint32_t constituents[VKD3D_VEC4_SIZE]; unsigned int i, size_component_count; struct vkd3d_shader_image image; -@@ -9146,10 +9385,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, +@@ -9146,10 +9479,16 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_composite_construct(builder, type_id, constituents, i + 2); @@ -13828,7 +19040,7 @@ index 5c87ff15503..c4e712b8471 100644 } else { -@@ -9158,7 +9403,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, +@@ -9158,7 +9497,7 @@ static void spirv_compiler_emit_resinfo(struct spirv_compiler *compiler, val_id = vkd3d_spirv_build_op_convert_utof(builder, type_id, val_id); } val_id = spirv_compiler_emit_swizzle(compiler, val_id, VKD3DSP_WRITEMASK_ALL, @@ -13837,7 +19049,200 @@ index 5c87ff15503..c4e712b8471 100644 spirv_compiler_emit_store_dst(compiler, dst, val_id); } -@@ -9475,6 +9720,11 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) +@@ -9468,6 +9807,192 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_end_primitive(builder); + } + ++static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) ++{ ++ switch (handler_idx) ++ { ++ case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: ++ return SpvOpGroupNonUniformAllEqual; ++ case VKD3DSIH_WAVE_ALL_TRUE: ++ return SpvOpGroupNonUniformAll; ++ case VKD3DSIH_WAVE_ANY_TRUE: ++ return SpvOpGroupNonUniformAny; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, val_id; ++ SpvOp op; ++ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); ++ ++ op = map_wave_bool_op(instruction->handler_idx); ++ type_id = vkd3d_spirv_get_op_type_bool(builder); ++ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); ++ val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, ++ type_id, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static uint32_t spirv_compiler_emit_group_nonuniform_ballot(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_src_param *src) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ uint32_t type_id, val_id; ++ ++ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); ++ val_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); ++ val_id = vkd3d_spirv_build_op_group_nonuniform_ballot(builder, type_id, val_id); ++ ++ return val_id; ++} ++ ++static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ uint32_t val_id; ++ ++ val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) ++{ ++ switch (handler_idx) ++ { ++ case VKD3DSIH_WAVE_ACTIVE_BIT_AND: ++ return SpvOpGroupNonUniformBitwiseAnd; ++ case VKD3DSIH_WAVE_ACTIVE_BIT_OR: ++ return SpvOpGroupNonUniformBitwiseOr; ++ case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: ++ return SpvOpGroupNonUniformBitwiseXor; ++ case VKD3DSIH_WAVE_OP_ADD: ++ return is_float ? SpvOpGroupNonUniformFAdd : SpvOpGroupNonUniformIAdd; ++ case VKD3DSIH_WAVE_OP_IMAX: ++ return SpvOpGroupNonUniformSMax; ++ case VKD3DSIH_WAVE_OP_IMIN: ++ return SpvOpGroupNonUniformSMin; ++ case VKD3DSIH_WAVE_OP_MAX: ++ return SpvOpGroupNonUniformFMax; ++ case VKD3DSIH_WAVE_OP_MIN: ++ return SpvOpGroupNonUniformFMin; ++ case VKD3DSIH_WAVE_OP_MUL: ++ return is_float ? SpvOpGroupNonUniformFMul : SpvOpGroupNonUniformIMul; ++ case VKD3DSIH_WAVE_OP_UMAX: ++ return SpvOpGroupNonUniformUMax; ++ case VKD3DSIH_WAVE_OP_UMIN: ++ return SpvOpGroupNonUniformUMin; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, val_id; ++ SpvOp op; ++ ++ op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, ++ vsir_write_mask_component_count(dst->write_mask)); ++ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); ++ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformArithmetic); ++ val_id = vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, op, type_id, ++ vkd3d_spirv_get_op_scope_subgroup(builder), ++ (instruction->flags & VKD3DSI_WAVE_PREFIX) ? SpvGroupOperationExclusiveScan : SpvGroupOperationReduce, ++ val_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ SpvGroupOperation group_op; ++ uint32_t type_id, val_id; ++ ++ group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan ++ : SpvGroupOperationReduce; ++ ++ val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); ++ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); ++ val_id = vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(builder, type_id, group_op, val_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_wave_is_first_lane(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ uint32_t val_id; ++ ++ val_id = vkd3d_spirv_build_op_group_nonuniform_elect(builder); ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_wave_read_lane_at(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, lane_id, val_id; ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, ++ vsir_write_mask_component_count(dst->write_mask)); ++ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); ++ lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); ++ ++ /* TODO: detect values loaded from a const buffer? */ ++ if (register_is_constant_or_undef(&src[1].reg)) ++ { ++ /* Uniform lane_id only. */ ++ val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast(builder, type_id, val_id, lane_id); ++ } ++ else ++ { ++ /* WaveReadLaneAt supports non-uniform lane ids, so if lane_id is not constant it may not be uniform. */ ++ val_id = vkd3d_spirv_build_op_group_nonuniform_shuffle(builder, type_id, val_id, lane_id); ++ } ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_wave_read_lane_first(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, val_id; ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, ++ vsir_write_mask_component_count(dst->write_mask)); ++ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); ++ val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast_first(builder, type_id, val_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ + /* This function is called after declarations are processed. */ + static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) + { +@@ -9475,6 +10000,11 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) if (compiler->emit_point_size) spirv_compiler_emit_point_size(compiler); @@ -13849,7 +19254,16 @@ index 5c87ff15503..c4e712b8471 100644 } static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, -@@ -9549,6 +9799,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9482,6 +10012,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + { + int ret = VKD3D_OK; + ++ compiler->location = instruction->location; ++ + switch (instruction->handler_idx) + { + case VKD3DSIH_DCL_GLOBAL_FLAGS: +@@ -9549,6 +10081,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, break; case VKD3DSIH_DMOVC: case VKD3DSIH_MOVC: @@ -13857,7 +19271,7 @@ index 5c87ff15503..c4e712b8471 100644 spirv_compiler_emit_movc(compiler, instruction); break; case VKD3DSIH_SWAPC: -@@ -9587,6 +9838,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9587,6 +10120,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_ISFINITE: spirv_compiler_emit_isfinite(compiler, instruction); break; @@ -13865,7 +19279,7 @@ index 5c87ff15503..c4e712b8471 100644 case VKD3DSIH_ACOS: case VKD3DSIH_ASIN: case VKD3DSIH_ATAN: -@@ -9669,6 +9921,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9669,6 +10203,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_ULT: spirv_compiler_emit_comparison_instruction(compiler, instruction); break; @@ -13880,21 +19294,65 @@ index 5c87ff15503..c4e712b8471 100644 case VKD3DSIH_BFI: case VKD3DSIH_IBFE: case VKD3DSIH_UBFE: -@@ -9796,7 +10056,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9795,8 +10337,41 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_CUT_STREAM: spirv_compiler_emit_cut_stream(compiler, instruction); break; ++ case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: ++ case VKD3DSIH_WAVE_ALL_TRUE: ++ case VKD3DSIH_WAVE_ANY_TRUE: ++ spirv_compiler_emit_wave_bool_op(compiler, instruction); ++ break; ++ case VKD3DSIH_WAVE_ACTIVE_BALLOT: ++ spirv_compiler_emit_wave_active_ballot(compiler, instruction); ++ break; ++ case VKD3DSIH_WAVE_ACTIVE_BIT_AND: ++ case VKD3DSIH_WAVE_ACTIVE_BIT_OR: ++ case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: ++ case VKD3DSIH_WAVE_OP_ADD: ++ case VKD3DSIH_WAVE_OP_IMAX: ++ case VKD3DSIH_WAVE_OP_IMIN: ++ case VKD3DSIH_WAVE_OP_MAX: ++ case VKD3DSIH_WAVE_OP_MIN: ++ case VKD3DSIH_WAVE_OP_MUL: ++ case VKD3DSIH_WAVE_OP_UMAX: ++ case VKD3DSIH_WAVE_OP_UMIN: ++ spirv_compiler_emit_wave_alu_op(compiler, instruction); ++ break; ++ case VKD3DSIH_WAVE_ALL_BIT_COUNT: ++ case VKD3DSIH_WAVE_PREFIX_BIT_COUNT: ++ spirv_compiler_emit_wave_bit_count(compiler, instruction); ++ break; ++ case VKD3DSIH_WAVE_IS_FIRST_LANE: ++ spirv_compiler_emit_wave_is_first_lane(compiler, instruction); ++ break; ++ case VKD3DSIH_WAVE_READ_LANE_AT: ++ spirv_compiler_emit_wave_read_lane_at(compiler, instruction); ++ break; ++ case VKD3DSIH_WAVE_READ_LANE_FIRST: ++ spirv_compiler_emit_wave_read_lane_first(compiler, instruction); ++ break; case VKD3DSIH_DCL: - case VKD3DSIH_DCL_CONSTANT_BUFFER: case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: case VKD3DSIH_DCL_INPUT_SGV: -@@ -9899,13 +10158,14 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +@@ -9892,20 +10467,19 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c + } + } + +-static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, +- struct vkd3d_shader_code *spirv) ++static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv) + { const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; struct vkd3d_shader_instruction_array instructions; - struct vsir_program *program = &parser->program; +- struct vsir_program *program = &parser->program; + enum vkd3d_shader_spirv_environment environment; enum vkd3d_result result = VKD3D_OK; unsigned int i; @@ -13905,7 +19363,16 @@ index 5c87ff15503..c4e712b8471 100644 return result; if (program->temp_count) -@@ -9924,12 +10184,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +@@ -9915,21 +10489,18 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + + spirv_compiler_emit_descriptor_declarations(compiler); + +- compiler->location.column = 0; +- compiler->location.line = 1; +- + if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + instructions = program->instructions; memset(&program->instructions, 0, sizeof(program->instructions)); @@ -13924,7 +19391,15 @@ index 5c87ff15503..c4e712b8471 100644 compiler->use_vocp = program->use_vocp; compiler->block_names = program->block_names; compiler->block_name_count = program->block_name_count; -@@ -9985,12 +10245,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +@@ -9942,7 +10513,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + + for (i = 0; i < instructions.count && result >= 0; ++i) + { +- compiler->location.line = i + 1; + result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); + } + +@@ -9985,12 +10555,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, if (compiler->strip_debug) vkd3d_spirv_stream_clear(&builder->debug_stream); @@ -13933,13 +19408,14 @@ index 5c87ff15503..c4e712b8471 100644 + if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler), environment)) return VKD3D_ERROR; - if (TRACE_ON() || parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) +- if (TRACE_ON() || parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) ++ if (TRACE_ON() || compiler->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) { - enum vkd3d_shader_spirv_environment environment = spirv_compiler_get_target_environment(compiler); struct vkd3d_string_buffer buffer; if (TRACE_ON()) -@@ -10018,7 +10278,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +@@ -10018,7 +10588,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) { struct vkd3d_shader_code text; @@ -13947,27 +19423,76 @@ index 5c87ff15503..c4e712b8471 100644 if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) return VKD3D_ERROR; vkd3d_shader_free_shader_code(spirv); -@@ -10036,8 +10295,8 @@ int spirv_compile(struct vkd3d_shader_parser *parser, +@@ -10028,7 +10597,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + return VKD3D_OK; + } + +-int spirv_compile(struct vkd3d_shader_parser *parser, ++int spirv_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +@@ -10036,14 +10605,14 @@ int spirv_compile(struct vkd3d_shader_parser *parser, struct spirv_compiler *spirv_compiler; int ret; - if (!(spirv_compiler = spirv_compiler_create(&parser->program.shader_version, &parser->shader_desc, - compile_info, scan_descriptor_info, message_context, &parser->location, parser->config_flags))) -+ if (!(spirv_compiler = spirv_compiler_create(&parser->program, compile_info, -+ scan_descriptor_info, message_context, &parser->location, parser->config_flags))) ++ if (!(spirv_compiler = spirv_compiler_create(program, compile_info, ++ scan_descriptor_info, message_context, config_flags))) { ERR("Failed to create SPIR-V compiler.\n"); return VKD3D_ERROR; + } + +- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); ++ ret = spirv_compiler_generate_spirv(spirv_compiler, program, compile_info, out); + + spirv_compiler_destroy(spirv_compiler); + return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 3be4e40ab0c..6ee06c02d74 100644 +index 3be4e40ab0c..b562e815a81 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -954,32 +954,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins +@@ -719,14 +719,9 @@ static const enum vkd3d_data_type data_type_table[] = + /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, + }; + +-static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) +-{ +- return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); +-} +- + static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) + { +- const struct vkd3d_shader_version *version = &sm4->p.program.shader_version; ++ const struct vkd3d_shader_version *version = &sm4->p.program->shader_version; + + return version->major >= 5 && version->minor >= 1; + } +@@ -811,7 +806,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui + icb->element_count = icb_size / VKD3D_VEC4_SIZE; + icb->is_null = false; + memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); +- shader_instruction_array_add_icb(&priv->p.program.instructions, icb); ++ shader_instruction_array_add_icb(&priv->p.program->instructions, icb); + ins->declaration.icb = icb; + } + +@@ -933,6 +928,7 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) + { + struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; ++ struct vsir_program *program = priv->p.program; + unsigned int i, register_idx, register_count; + const struct shader_signature *signature; + enum vkd3d_shader_register_type type; +@@ -954,32 +950,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins case VKD3DSPR_INCONTROLPOINT: io_masks = priv->input_register_masks; ranges = &priv->input_index_ranges; - signature = &priv->p.shader_desc.input_signature; -+ signature = &priv->p.program.input_signature; ++ signature = &program->input_signature; break; case VKD3DSPR_OUTPUT: if (sm4_parser_is_in_fork_or_join_phase(priv)) @@ -13975,14 +19500,14 @@ index 3be4e40ab0c..6ee06c02d74 100644 io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; - signature = &priv->p.shader_desc.patch_constant_signature; -+ signature = &priv->p.program.patch_constant_signature; ++ signature = &program->patch_constant_signature; } else { io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; - signature = &priv->p.shader_desc.output_signature; -+ signature = &priv->p.program.output_signature; ++ signature = &program->output_signature; } break; case VKD3DSPR_COLOROUT: @@ -13990,35 +19515,101 @@ index 3be4e40ab0c..6ee06c02d74 100644 io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; - signature = &priv->p.shader_desc.output_signature; -+ signature = &priv->p.program.output_signature; ++ signature = &program->output_signature; break; case VKD3DSPR_PATCHCONST: io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; - signature = &priv->p.shader_desc.patch_constant_signature; -+ signature = &priv->p.program.patch_constant_signature; ++ signature = &program->patch_constant_signature; break; default: -@@ -1113,7 +1113,7 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u +@@ -1057,16 +1053,17 @@ static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction + } + + static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) + { + enum vkd3d_sm4_input_primitive_type primitive_type; ++ struct vsir_program *program = sm4->p.program; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) + { + ins->declaration.primitive_type.type = VKD3D_PT_PATCH; + ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; +- priv->p.program.input_control_point_count = ins->declaration.primitive_type.patch_vertex_count; ++ program->input_control_point_count = ins->declaration.primitive_type.patch_vertex_count; + } + else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) + { +@@ -1075,7 +1072,7 @@ static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction + else + { + ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type].vkd3d_type; +- priv->p.program.input_control_point_count = input_primitive_type_table[primitive_type].control_point_count; ++ program->input_control_point_count = input_primitive_type_table[primitive_type].control_point_count; + } + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) +@@ -1083,11 +1080,13 @@ static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction + } + + static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) + { ++ struct vsir_program *program = sm4->p.program; ++ + ins->declaration.count = *tokens; + if (opcode == VKD3D_SM4_OP_DCL_TEMPS) +- priv->p.program.temp_count = max(priv->p.program.temp_count, *tokens); ++ program->temp_count = max(program->temp_count, *tokens); + } + + static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1113,7 +1112,7 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) { struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); ++ &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); e->interpolation_mode = ins->flags; } -@@ -1128,7 +1128,7 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in +@@ -1128,7 +1127,7 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) { struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); -+ &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); ++ &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); e->interpolation_mode = ins->flags; } -@@ -1263,6 +1263,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u +@@ -1183,15 +1182,17 @@ static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, + } + + static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) + { ++ struct vsir_program *program = sm4->p.program; ++ + ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) + >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; + + if (opcode == VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT) +- priv->p.program.input_control_point_count = ins->declaration.count; ++ program->input_control_point_count = ins->declaration.count; + else +- priv->p.program.output_control_point_count = ins->declaration.count; ++ program->output_control_point_count = ins->declaration.count; + } + + static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1263,6 +1264,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u ins->declaration.tgsm_raw.byte_count = *tokens; if (ins->declaration.tgsm_raw.byte_count % 4) FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); @@ -14026,7 +19617,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 } static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1274,6 +1275,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction +@@ -1274,6 +1276,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction ins->declaration.tgsm_structured.structure_count = *tokens; if (ins->declaration.tgsm_structured.byte_stride % 4) FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); @@ -14034,38 +19625,98 @@ index 3be4e40ab0c..6ee06c02d74 100644 } static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1748,7 +1750,6 @@ static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - vsir_program_cleanup(&parser->program); -- free_shader_desc(&parser->shader_desc); - vkd3d_free(sm4); +@@ -1743,21 +1746,12 @@ static enum vkd3d_data_type map_data_type(char t) + } } -@@ -2504,7 +2505,7 @@ static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = - }; +-static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) +-{ +- struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); +- +- vsir_program_cleanup(&parser->program); +- free_shader_desc(&parser->shader_desc); +- vkd3d_free(sm4); +-} +- + static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) + { + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) + { +- struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(&priv->p.program, 1); ++ struct vkd3d_shader_src_param *rel_addr = vsir_program_get_src_params(priv->p.program, 1); - static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, + if (!(reg_idx->rel_addr = rel_addr)) + { +@@ -2035,7 +2029,7 @@ static bool register_is_control_point_input(const struct vkd3d_shader_register * + { + return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT + || (reg->type == VKD3DSPR_INPUT && (priv->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE +- || priv->p.program.shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); ++ || priv->p.program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); + } + + static uint32_t mask_from_swizzle(uint32_t swizzle) +@@ -2359,7 +2353,7 @@ static void shader_sm4_read_instruction_modifier(uint32_t modifier, struct vkd3d + static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) + { + const struct vkd3d_sm4_opcode_info *opcode_info; +- struct vsir_program *program = &sm4->p.program; ++ struct vsir_program *program = sm4->p.program; + uint32_t opcode_token, opcode, previous_token; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; +@@ -2498,13 +2492,8 @@ fail: + return; + } + +-static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = +-{ +- .parser_destroy = shader_sm4_destroy, +-}; +- +-static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, - size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, -+ size_t byte_code_size, const char *source_name, ++static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_program *program, ++ const uint32_t *byte_code, size_t byte_code_size, const char *source_name, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_version version; -@@ -2648,9 +2649,9 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +@@ -2563,9 +2552,9 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t + version.minor = VKD3D_SM4_VERSION_MINOR(version_token); + + /* Estimate instruction count to avoid reallocation in most shaders. */ +- if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, +- token_count / 7u + 20)) ++ if (!vsir_program_init(program, &version, token_count / 7u + 20)) + return false; ++ vkd3d_shader_parser_init(&sm4->p, program, message_context, source_name); + sm4->ptr = sm4->start; + + init_sm4_lookup_tables(&sm4->lookup); +@@ -2644,94 +2633,88 @@ static void shader_sm4_validate_default_phase_index_ranges(struct vkd3d_shader_s + return; + } + +-int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) ++int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, ++ struct vkd3d_shader_message_context *message_context, struct vsir_program *program) { struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_desc *shader_desc; - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm4_parser *sm4; ++ struct vkd3d_shader_sm4_parser sm4 = {0}; + struct dxbc_shader_desc dxbc_desc = {0}; + struct vkd3d_shader_instruction *ins; +- struct vkd3d_shader_sm4_parser *sm4; int ret; - if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) -@@ -2659,36 +2660,40 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - return VKD3D_ERROR_OUT_OF_MEMORY; - } - +- if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) +- { +- ERR("Failed to allocate parser.\n"); +- return VKD3D_ERROR_OUT_OF_MEMORY; +- } +- - shader_desc = &sm4->p.shader_desc; - shader_desc->is_dxil = false; + dxbc_desc.is_dxil = false; @@ -14074,56 +19725,128 @@ index 3be4e40ab0c..6ee06c02d74 100644 + message_context, compile_info->source_name, &dxbc_desc)) < 0) { WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm4); +- vkd3d_free(sm4); return ret; } - if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, &shader_desc->output_signature, message_context)) -+ if (!shader_sm4_init(sm4, dxbc_desc.byte_code, dxbc_desc.byte_code_size, ++ if (!shader_sm4_init(&sm4, program, dxbc_desc.byte_code, dxbc_desc.byte_code_size, + compile_info->source_name, message_context)) { WARN("Failed to initialise shader parser.\n"); - free_shader_desc(shader_desc); +- vkd3d_free(sm4); + free_dxbc_shader_desc(&dxbc_desc); - vkd3d_free(sm4); return VKD3D_ERROR_INVALID_ARGUMENT; } -+ sm4->p.program.input_signature = dxbc_desc.input_signature; -+ sm4->p.program.output_signature = dxbc_desc.output_signature; -+ sm4->p.program.patch_constant_signature = dxbc_desc.patch_constant_signature; ++ program->input_signature = dxbc_desc.input_signature; ++ program->output_signature = dxbc_desc.output_signature; ++ program->patch_constant_signature = dxbc_desc.patch_constant_signature; + memset(&dxbc_desc, 0, sizeof(dxbc_desc)); + /* DXBC stores used masks inverted for output signatures, for some reason. * We return them un-inverted. */ - uninvert_used_masks(&shader_desc->output_signature); -+ uninvert_used_masks(&sm4->p.program.output_signature); - if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL) +- if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL) - uninvert_used_masks(&shader_desc->patch_constant_signature); -+ uninvert_used_masks(&sm4->p.program.patch_constant_signature); - +- - if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, -+ if (!shader_sm4_parser_validate_signature(sm4, &sm4->p.program.input_signature, - sm4->input_register_masks, "Input") +- sm4->input_register_masks, "Input") - || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, -+ || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.output_signature, - sm4->output_register_masks, "Output") +- sm4->output_register_masks, "Output") - || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, -+ || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.patch_constant_signature, - sm4->patch_constant_register_masks, "Patch constant")) - { - shader_sm4_destroy(&sm4->p); -@@ -2721,7 +2726,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - shader_sm4_validate_default_phase_index_ranges(sm4); +- sm4->patch_constant_register_masks, "Patch constant")) +- { +- shader_sm4_destroy(&sm4->p); ++ uninvert_used_masks(&program->output_signature); ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) ++ uninvert_used_masks(&program->patch_constant_signature); ++ ++ if (!shader_sm4_parser_validate_signature(&sm4, &program->input_signature, ++ sm4.input_register_masks, "Input") ++ || !shader_sm4_parser_validate_signature(&sm4, &program->output_signature, ++ sm4.output_register_masks, "Output") ++ || !shader_sm4_parser_validate_signature(&sm4, &program->patch_constant_signature, ++ sm4.patch_constant_register_masks, "Patch constant")) ++ { ++ vsir_program_cleanup(program); + return VKD3D_ERROR_INVALID_SHADER; + } - if (!sm4->p.failed) +- instructions = &sm4->p.program.instructions; +- while (sm4->ptr != sm4->end) ++ instructions = &program->instructions; ++ while (sm4.ptr != sm4.end) + { + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ERR("Failed to allocate instructions.\n"); +- vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- shader_sm4_destroy(&sm4->p); ++ vkd3d_shader_parser_error(&sm4.p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); ++ vsir_program_cleanup(program); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ins = &instructions->elements[instructions->count]; +- shader_sm4_read_instruction(sm4, ins); ++ shader_sm4_read_instruction(&sm4, ins); + + if (ins->handler_idx == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); +- shader_sm4_destroy(&sm4->p); ++ vsir_program_cleanup(program); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ++instructions->count; + } +- if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL +- && !sm4->has_control_point_phase && !sm4->p.failed) +- shader_sm4_validate_default_phase_index_ranges(sm4); ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL ++ && !sm4.has_control_point_phase && !sm4.p.failed) ++ shader_sm4_validate_default_phase_index_ranges(&sm4); + +- if (!sm4->p.failed) - vsir_validate(&sm4->p); -+ vkd3d_shader_parser_validate(&sm4->p); ++ if (!sm4.p.failed) ++ vkd3d_shader_parser_validate(&sm4.p, config_flags); - if (sm4->p.failed) +- if (sm4->p.failed) ++ if (sm4.p.failed) { -@@ -2989,26 +2994,28 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + WARN("Failed to parse shader.\n"); +- shader_sm4_destroy(&sm4->p); ++ vsir_program_cleanup(program); + return VKD3D_ERROR_INVALID_SHADER; + } + +- *parser = &sm4->p; +- + return VKD3D_OK; + } + +@@ -2739,7 +2722,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc + + static bool type_is_integer(const struct hlsl_type *type) + { +- switch (type->base_type) ++ switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: +@@ -2928,7 +2911,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, usage_idx); + put_u32(&buffer, usage); +- switch (var->data_type->base_type) ++ switch (var->data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +@@ -2989,31 +2972,39 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) { switch (type->class) { @@ -14148,12 +19871,18 @@ index 3be4e40ab0c..6ee06c02d74 100644 - vkd3d_unreachable(); + + case HLSL_CLASS_ARRAY: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_STRUCT: -+ case HLSL_CLASS_OBJECT: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: + break; } @@ -14161,7 +19890,13 @@ index 3be4e40ab0c..6ee06c02d74 100644 } static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) -@@ -3024,68 +3031,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) + { +- switch (type->base_type) ++ switch (type->e.numeric.type) + { + case HLSL_TYPE_BOOL: + return D3D_SVT_BOOL; +@@ -3024,68 +3015,8 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) return D3D_SVT_FLOAT; case HLSL_TYPE_INT: return D3D_SVT_INT; @@ -14230,7 +19965,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 default: vkd3d_unreachable(); } -@@ -3096,8 +3043,8 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b +@@ -3096,8 +3027,8 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); const char *name = array_type->name ? array_type->name : ""; const struct hlsl_profile_info *profile = ctx->profile; @@ -14241,7 +19976,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 size_t i; if (type->bytecode_offset) -@@ -3111,32 +3058,47 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b +@@ -3111,32 +3042,47 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b if (array_type->class == HLSL_CLASS_STRUCT) { @@ -14274,7 +20009,8 @@ index 3be4e40ab0c..6ee06c02d74 100644 + put_u32(buffer, field->name_bytecode_offset); put_u32(buffer, field->type->bytecode_offset); - put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); +- put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); ++ put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); } + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); + put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); @@ -14297,7 +20033,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 if (profile->major_version >= 5) { -@@ -3150,20 +3112,21 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b +@@ -3150,20 +3096,21 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) { @@ -14327,7 +20063,16 @@ index 3be4e40ab0c..6ee06c02d74 100644 } static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) -@@ -3328,7 +3291,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un +@@ -3171,7 +3118,7 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_format(type->e.array.type); + +- switch (type->e.resource.format->base_type) ++ switch (type->e.resource.format->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + return D3D_RETURN_TYPE_DOUBLE; +@@ -3328,7 +3275,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un extern_resources[*count].name = name; extern_resources[*count].data_type = component_type; @@ -14336,7 +20081,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 extern_resources[*count].regset = regset; extern_resources[*count].id = var->regs[regset].id + regset_offset; -@@ -3428,10 +3391,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3428,10 +3375,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) if (profile->major_version >= 5) { @@ -14349,7 +20094,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -@@ -3448,6 +3411,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3448,6 +3395,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) const struct extern_resource *resource = &extern_resources[i]; uint32_t flags = 0; @@ -14359,7 +20104,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 if (resource->is_user_packed) flags |= D3D_SIF_USERPACKED; -@@ -3480,6 +3446,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3480,6 +3430,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) if (!cbuffer->reg.allocated) continue; @@ -14369,7 +20114,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 if (cbuffer->reservation.reg_type) flags |= D3D_SIF_USERPACKED; -@@ -3523,8 +3492,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3523,8 +3476,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -14379,7 +20124,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 ++var_count; } -@@ -3558,8 +3526,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3558,8 +3510,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -14389,7 +20134,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 { uint32_t flags = 0; -@@ -3586,8 +3553,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3586,8 +3537,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) j = 0; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -14399,7 +20144,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 { const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); -@@ -4598,7 +4564,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node +@@ -4598,7 +4548,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node enum hlsl_sampler_dim dim) { const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); @@ -14408,7 +20153,237 @@ index 3be4e40ab0c..6ee06c02d74 100644 && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; -@@ -5389,7 +5355,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex +@@ -4756,11 +4706,11 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl + const struct hlsl_ir_node *dst = &load->node; + struct sm4_instruction instr; + +- assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); ++ assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; +- if (dst->data_type->base_type == HLSL_TYPE_UINT) ++ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) + instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + + sm4_dst_from_node(&instr.dsts[0], dst); +@@ -4785,11 +4735,11 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir + return; + } + +- assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); ++ assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_RESINFO; +- if (dst->data_type->base_type == HLSL_TYPE_UINT) ++ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) + instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + + sm4_dst_from_node(&instr.dsts[0], dst); +@@ -4804,7 +4754,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir + + static bool type_is_float(const struct hlsl_type *type) + { +- return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; ++ return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; + } + + static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, +@@ -4841,11 +4791,11 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == dst_type->dimx); + +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- switch (src_type->base_type) ++ switch (src_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +@@ -4874,7 +4824,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_TYPE_INT: +- switch (src_type->base_type) ++ switch (src_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +@@ -4900,7 +4850,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_TYPE_UINT: +- switch (src_type->base_type) ++ switch (src_type->e.numeric.type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +@@ -4970,7 +4920,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + switch (expr->op) + { + case HLSL_OP1_ABS: +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS); +@@ -5051,12 +5001,12 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP1_LOGIC_NOT: +- assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_NEG: +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG); +@@ -5109,7 +5059,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP2_ADD: +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); +@@ -5141,7 +5091,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP2_DIV: +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); +@@ -5157,7 +5107,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP2_DOT: +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + switch (arg1->data_type->dimx) +@@ -5189,9 +5139,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + const struct hlsl_type *src_type = arg1->data_type; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + +- switch (src_type->base_type) ++ switch (src_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); +@@ -5215,9 +5165,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + const struct hlsl_type *src_type = arg1->data_type; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + +- switch (src_type->base_type) ++ switch (src_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); +@@ -5244,9 +5194,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + const struct hlsl_type *src_type = arg1->data_type; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + +- switch (src_type->base_type) ++ switch (src_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); +@@ -5270,23 +5220,23 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + } + + case HLSL_OP2_LOGIC_AND: +- assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LOGIC_OR: +- assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LSHIFT: + assert(type_is_integer(dst_type)); +- assert(dst_type->base_type != HLSL_TYPE_BOOL); ++ assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MAX: +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); +@@ -5306,7 +5256,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP2_MIN: +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); +@@ -5326,7 +5276,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP2_MOD: +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_UINT: + write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); +@@ -5338,7 +5288,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP2_MUL: +- switch (dst_type->base_type) ++ switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); +@@ -5360,9 +5310,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + const struct hlsl_type *src_type = arg1->data_type; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + +- switch (src_type->base_type) ++ switch (src_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); +@@ -5384,12 +5334,12 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + + case HLSL_OP2_RSHIFT: + assert(type_is_integer(dst_type)); +- assert(dst_type->base_type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, ++ assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); ++ write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, &expr->node, arg1, arg2); break; @@ -14417,7 +20392,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); break; -@@ -5445,7 +5411,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju +@@ -5445,7 +5395,8 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju case HLSL_IR_JUMP_DISCARD_NZ: { @@ -14427,7 +20402,16 @@ index 3be4e40ab0c..6ee06c02d74 100644 memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); instr.src_count = 1; -@@ -5746,18 +5713,12 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc +@@ -5486,7 +5437,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo + instr.dst_count = 1; + + assert(hlsl_is_numeric_type(type)); +- if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) ++ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) + { + struct hlsl_constant_value value; + +@@ -5746,18 +5697,12 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc { if (instr->data_type) { @@ -14449,7 +20433,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 if (!instr->reg.allocated) { -@@ -5854,13 +5815,21 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, +@@ -5854,13 +5799,21 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) @@ -14472,7 +20456,7 @@ index 3be4e40ab0c..6ee06c02d74 100644 write_sm4_dcl_samplers(&tpf, resource); else if (resource->regset == HLSL_REGSET_TEXTURES) diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 4f400d19f6f..cb37efb53f7 100644 +index 4f400d19f6f..b8dd0dba377 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ @@ -14520,22 +20504,100 @@ index 4f400d19f6f..cb37efb53f7 100644 void vkd3d_shader_message_context_init(struct vkd3d_shader_message_context *context, enum vkd3d_shader_log_level log_level) { -@@ -1438,11 +1458,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info +@@ -520,7 +540,7 @@ static const struct vkd3d_debug_option vkd3d_shader_config_options[] = + {"force_validation", VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION}, /* force validation of internal shader representations */ + }; + +-static uint64_t vkd3d_shader_init_config_flags(void) ++uint64_t vkd3d_shader_init_config_flags(void) + { + uint64_t config_flags; + const char *config; +@@ -534,18 +554,14 @@ static uint64_t vkd3d_shader_init_config_flags(void) + return config_flags; + } + +-bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, +- struct vkd3d_shader_message_context *message_context, const char *source_name, +- const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, +- unsigned int instruction_reserve) ++void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context, const char *source_name) + { + parser->message_context = message_context; + parser->location.source_name = source_name; + parser->location.line = 1; + parser->location.column = 0; +- parser->ops = ops; +- parser->config_flags = vkd3d_shader_init_config_flags(); +- return vsir_program_init(&parser->program, version, instruction_reserve); ++ parser->program = program; + } + + void VKD3D_PRINTF_FUNC(3, 4) vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, +@@ -1375,9 +1391,9 @@ static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_des + vkd3d_free(scan_descriptor_info->descriptors); + } + +-static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, ++static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, +- struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) ++ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) + { + struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; +@@ -1408,27 +1424,27 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + descriptor_info1 = &local_descriptor_info1; + } + +- vkd3d_shader_scan_context_init(&context, &parser->program.shader_version, compile_info, ++ vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, + descriptor_info1, combined_sampler_info, message_context); + + if (TRACE_ON()) +- vkd3d_shader_trace(&parser->program); ++ vkd3d_shader_trace(program); + +- for (i = 0; i < parser->program.instructions.count; ++i) ++ for (i = 0; i < program->instructions.count; ++i) + { +- instruction = &parser->program.instructions.elements[i]; ++ instruction = &program->instructions.elements[i]; + if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) + break; + } + +- for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) ++ for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) + { +- unsigned int size = parser->shader_desc.flat_constant_count[i].external; + struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; + struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; ++ unsigned int size = program->flat_constant_count[i]; + struct vkd3d_shader_descriptor_info1 *d; + +- if (parser->shader_desc.flat_constant_count[i].external) ++ if (size) + { + if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, + &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) +@@ -1438,11 +1454,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info if (!ret && signature_info) { - if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) -+ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->program.input_signature) ++ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &program->input_signature) || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, - &parser->shader_desc.output_signature) -+ &parser->program.output_signature) ++ &program->output_signature) || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, - &parser->shader_desc.patch_constant_signature)) -+ &parser->program.patch_constant_signature)) ++ &program->patch_constant_signature)) { ret = VKD3D_ERROR_OUT_OF_MEMORY; } -@@ -1470,60 +1490,6 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info +@@ -1470,60 +1486,6 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info return ret; } @@ -14596,7 +20658,7 @@ index 4f400d19f6f..cb37efb53f7 100644 int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) { struct vkd3d_shader_message_context message_context; -@@ -1543,29 +1509,44 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char +@@ -1543,29 +1505,45 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char vkd3d_shader_dump_shader(compile_info); @@ -14611,7 +20673,8 @@ index 4f400d19f6f..cb37efb53f7 100644 + } + else + { -+ struct vkd3d_shader_parser *parser; ++ uint64_t config_flags = vkd3d_shader_init_config_flags(); ++ struct vsir_program program; - case VKD3D_SHADER_SOURCE_HLSL: - FIXME("HLSL support not implemented.\n"); @@ -14620,21 +20683,21 @@ index 4f400d19f6f..cb37efb53f7 100644 + switch (compile_info->source_type) + { + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -+ ret = vkd3d_shader_sm1_parser_create(compile_info, &message_context, &parser); ++ ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); + break; - case VKD3D_SHADER_SOURCE_D3D_BYTECODE: - ret = scan_d3dbc(compile_info, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_TPF: -+ ret = vkd3d_shader_sm4_parser_create(compile_info, &message_context, &parser); ++ ret = tpf_parse(compile_info, config_flags, &message_context, &program); + break; - case VKD3D_SHADER_SOURCE_DXBC_DXIL: - ret = scan_dxil(compile_info, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = vkd3d_shader_sm6_parser_create(compile_info, &message_context, &parser); ++ ret = dxil_parse(compile_info, config_flags, &message_context, &program); + break; - default: @@ -14649,32 +20712,33 @@ index 4f400d19f6f..cb37efb53f7 100644 + + if (ret < 0) + { -+ WARN("Failed to create shader parser.\n"); ++ WARN("Failed to parse shader.\n"); + } + else + { -+ ret = scan_with_parser(compile_info, &message_context, NULL, parser); -+ vkd3d_shader_parser_destroy(parser); ++ ret = vsir_program_scan(&program, compile_info, &message_context, NULL); ++ vsir_program_cleanup(&program); + } } vkd3d_shader_message_context_trace_messages(&message_context); -@@ -1575,12 +1556,12 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char +@@ -1575,12 +1553,11 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char return ret; } -static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, -+int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +- const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) ++int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; - struct vkd3d_glsl_generator *glsl_generator; -+ struct vsir_program *program = &parser->program; struct vkd3d_shader_compile_info scan_info; int ret; -@@ -1589,22 +1570,13 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, +@@ -1589,30 +1566,22 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, switch (compile_info->target_type) { case VKD3D_SHADER_TARGET_D3D_ASM: @@ -14683,7 +20747,8 @@ index 4f400d19f6f..cb37efb53f7 100644 break; case VKD3D_SHADER_TARGET_GLSL: - if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) +- if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) ++ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) return ret; - if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->program.shader_version, - message_context, &parser->location))) @@ -14695,11 +20760,22 @@ index 4f400d19f6f..cb37efb53f7 100644 - - ret = vkd3d_glsl_generator_generate(glsl_generator, &parser->program, out); - vkd3d_glsl_generator_destroy(glsl_generator); -+ ret = glsl_compile(program, parser->config_flags, compile_info, out, message_context); ++ ret = glsl_compile(program, config_flags, compile_info, out, message_context); vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; -@@ -1624,24 +1596,6 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + case VKD3D_SHADER_TARGET_SPIRV_BINARY: + case VKD3D_SHADER_TARGET_SPIRV_TEXT: +- if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) ++ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) + return ret; +- ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); ++ ret = spirv_compile(program, config_flags, &scan_descriptor_info, ++ compile_info, out, message_context); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + break; + +@@ -1624,24 +1593,6 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, return ret; } @@ -14724,7 +20800,7 @@ index 4f400d19f6f..cb37efb53f7 100644 static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { -@@ -1657,42 +1611,6 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, +@@ -1657,42 +1608,6 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, return ret; } @@ -14767,7 +20843,7 @@ index 4f400d19f6f..cb37efb53f7 100644 int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { -@@ -1713,26 +1631,43 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, +@@ -1713,26 +1628,44 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, vkd3d_shader_dump_shader(compile_info); @@ -14781,7 +20857,8 @@ index 4f400d19f6f..cb37efb53f7 100644 + } + else + { -+ struct vkd3d_shader_parser *parser; ++ uint64_t config_flags = vkd3d_shader_init_config_flags(); ++ struct vsir_program program; - case VKD3D_SHADER_SOURCE_HLSL: - ret = compile_hlsl(compile_info, out, &message_context); @@ -14789,21 +20866,21 @@ index 4f400d19f6f..cb37efb53f7 100644 + switch (compile_info->source_type) + { + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -+ ret = vkd3d_shader_sm1_parser_create(compile_info, &message_context, &parser); ++ ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); + break; - case VKD3D_SHADER_SOURCE_D3D_BYTECODE: - ret = compile_d3d_bytecode(compile_info, out, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_TPF: -+ ret = vkd3d_shader_sm4_parser_create(compile_info, &message_context, &parser); ++ ret = tpf_parse(compile_info, config_flags, &message_context, &program); + break; - case VKD3D_SHADER_SOURCE_DXBC_DXIL: - ret = compile_dxbc_dxil(compile_info, out, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = vkd3d_shader_sm6_parser_create(compile_info, &message_context, &parser); ++ ret = dxil_parse(compile_info, config_flags, &message_context, &program); + break; - default: @@ -14816,17 +20893,17 @@ index 4f400d19f6f..cb37efb53f7 100644 + + if (ret < 0) + { -+ WARN("Failed to create shader parser.\n"); ++ WARN("Failed to parse shader.\n"); + } + else + { -+ ret = vkd3d_shader_parser_compile(parser, compile_info, out, &message_context); -+ vkd3d_shader_parser_destroy(parser); ++ ret = vsir_program_compile(&program, config_flags, compile_info, out, &message_context); ++ vsir_program_cleanup(&program); + } } vkd3d_shader_message_context_trace_messages(&message_context); -@@ -1937,13 +1872,18 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1937,13 +1870,18 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, @@ -14846,7 +20923,7 @@ index 4f400d19f6f..cb37efb53f7 100644 VKD3D_SHADER_TARGET_D3D_BYTECODE, VKD3D_SHADER_TARGET_DXBC_TPF, VKD3D_SHADER_TARGET_FX, -@@ -1958,13 +1898,21 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1958,13 +1896,21 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_D3D_ASM, }; @@ -14871,7 +20948,7 @@ index 4f400d19f6f..cb37efb53f7 100644 case VKD3D_SHADER_SOURCE_DXBC_TPF: *count = ARRAY_SIZE(dxbc_tpf_types); return dxbc_tpf_types; -@@ -1977,6 +1925,12 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1977,6 +1923,12 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( *count = ARRAY_SIZE(d3dbc_types); return d3dbc_types; @@ -14884,8 +20961,17 @@ index 4f400d19f6f..cb37efb53f7 100644 default: *count = 0; return NULL; +@@ -2050,7 +2002,7 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, + { + void *params; + +- if (count > allocator->count - allocator->index) ++ if (!allocator->current || count > allocator->count - allocator->index) + { + struct vkd3d_shader_param_node *next; + diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 2d3b3254638..1f4320968d3 100644 +index 2d3b3254638..29b8d6ad022 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -100,6 +100,7 @@ enum vkd3d_shader_error @@ -14905,15 +20991,24 @@ index 2d3b3254638..1f4320968d3 100644 VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -@@ -218,6 +221,7 @@ enum vkd3d_shader_error +@@ -199,6 +202,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_WARNING_DXIL_INVALID_MASK = 8307, + VKD3D_SHADER_WARNING_DXIL_INVALID_OPERATION = 8308, + VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT = 8309, ++ VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND = 8310, + + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, + VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER = 9001, +@@ -218,6 +222,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW = 9016, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, + VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION = 9018, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019, VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, }; -@@ -445,6 +449,7 @@ enum vkd3d_shader_opcode +@@ -445,6 +451,7 @@ enum vkd3d_shader_opcode VKD3DSIH_NOT, VKD3DSIH_NRM, VKD3DSIH_OR, @@ -14921,7 +21016,7 @@ index 2d3b3254638..1f4320968d3 100644 VKD3DSIH_PHASE, VKD3DSIH_PHI, VKD3DSIH_POW, -@@ -516,6 +521,7 @@ enum vkd3d_shader_opcode +@@ -516,10 +523,31 @@ enum vkd3d_shader_opcode VKD3DSIH_UMAX, VKD3DSIH_UMIN, VKD3DSIH_UMUL, @@ -14929,7 +21024,40 @@ index 2d3b3254638..1f4320968d3 100644 VKD3DSIH_USHR, VKD3DSIH_UTOD, VKD3DSIH_UTOF, -@@ -620,14 +626,16 @@ enum vkd3d_data_type + VKD3DSIH_UTOU, ++ VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL, ++ VKD3DSIH_WAVE_ACTIVE_BALLOT, ++ VKD3DSIH_WAVE_ACTIVE_BIT_AND, ++ VKD3DSIH_WAVE_ACTIVE_BIT_OR, ++ VKD3DSIH_WAVE_ACTIVE_BIT_XOR, ++ VKD3DSIH_WAVE_ALL_BIT_COUNT, ++ VKD3DSIH_WAVE_ALL_TRUE, ++ VKD3DSIH_WAVE_ANY_TRUE, ++ VKD3DSIH_WAVE_IS_FIRST_LANE, ++ VKD3DSIH_WAVE_OP_ADD, ++ VKD3DSIH_WAVE_OP_IMAX, ++ VKD3DSIH_WAVE_OP_IMIN, ++ VKD3DSIH_WAVE_OP_MAX, ++ VKD3DSIH_WAVE_OP_MIN, ++ VKD3DSIH_WAVE_OP_MUL, ++ VKD3DSIH_WAVE_OP_UMAX, ++ VKD3DSIH_WAVE_OP_UMIN, ++ VKD3DSIH_WAVE_PREFIX_BIT_COUNT, ++ VKD3DSIH_WAVE_READ_LANE_AT, ++ VKD3DSIH_WAVE_READ_LANE_FIRST, + VKD3DSIH_XOR, + + VKD3DSIH_INVALID, +@@ -583,6 +611,8 @@ enum vkd3d_shader_register_type + VKD3DSPR_OUTSTENCILREF, + VKD3DSPR_UNDEF, + VKD3DSPR_SSA, ++ VKD3DSPR_WAVELANECOUNT, ++ VKD3DSPR_WAVELANEINDEX, + + VKD3DSPR_COUNT, + +@@ -620,14 +650,16 @@ enum vkd3d_data_type VKD3D_DATA_UINT8, VKD3D_DATA_UINT64, VKD3D_DATA_BOOL, @@ -14948,7 +21076,7 @@ index 2d3b3254638..1f4320968d3 100644 } static inline bool data_type_is_bool(enum vkd3d_data_type data_type) -@@ -635,6 +643,11 @@ static inline bool data_type_is_bool(enum vkd3d_data_type data_type) +@@ -635,6 +667,11 @@ static inline bool data_type_is_bool(enum vkd3d_data_type data_type) return data_type == VKD3D_DATA_BOOL; } @@ -14960,7 +21088,7 @@ index 2d3b3254638..1f4320968d3 100644 static inline bool data_type_is_64_bit(enum vkd3d_data_type data_type) { return data_type == VKD3D_DATA_DOUBLE || data_type == VKD3D_DATA_UINT64; -@@ -749,11 +762,21 @@ enum vkd3d_shader_uav_flags +@@ -749,11 +786,21 @@ enum vkd3d_shader_uav_flags VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, }; @@ -14982,7 +21110,15 @@ index 2d3b3254638..1f4320968d3 100644 }; #define VKD3DSI_NONE 0x0 -@@ -808,6 +831,8 @@ enum vkd3d_shader_type +@@ -764,6 +811,7 @@ enum vkd3d_tessellator_domain + #define VKD3DSI_SAMPLE_INFO_UINT 0x1 + #define VKD3DSI_SAMPLER_COMPARISON_MODE 0x1 + #define VKD3DSI_SHIFT_UNMASKED 0x1 ++#define VKD3DSI_WAVE_PREFIX 0x1 + + #define VKD3DSI_PRECISE_X 0x100 + #define VKD3DSI_PRECISE_Y 0x200 +@@ -808,6 +856,8 @@ enum vkd3d_shader_type VKD3D_SHADER_TYPE_COUNT, }; @@ -14991,7 +21127,7 @@ index 2d3b3254638..1f4320968d3 100644 struct vkd3d_shader_version { enum vkd3d_shader_type type; -@@ -1025,7 +1050,7 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade +@@ -1025,7 +1075,7 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade unsigned int reg_idx, unsigned int write_mask); void shader_signature_cleanup(struct shader_signature *signature); @@ -15000,18 +21136,19 @@ index 2d3b3254638..1f4320968d3 100644 { const uint32_t *byte_code; size_t byte_code_size; -@@ -1033,7 +1058,10 @@ struct vkd3d_shader_desc +@@ -1033,11 +1083,6 @@ struct vkd3d_shader_desc struct shader_signature input_signature; struct shader_signature output_signature; struct shader_signature patch_constant_signature; -+}; +- +- struct +- { +- uint32_t used, external; +- } flat_constant_count[3]; + }; -+struct vkd3d_shader_desc -+{ - struct - { - uint32_t used, external; -@@ -1079,14 +1107,18 @@ struct vkd3d_shader_tgsm + struct vkd3d_shader_register_semantic +@@ -1079,14 +1124,18 @@ struct vkd3d_shader_tgsm struct vkd3d_shader_tgsm_raw { struct vkd3d_shader_dst_param reg; @@ -15030,7 +21167,47 @@ index 2d3b3254638..1f4320968d3 100644 }; struct vkd3d_shader_thread_group_size -@@ -1290,6 +1322,10 @@ struct vsir_program +@@ -1121,6 +1170,8 @@ enum vkd3d_primitive_type + VKD3D_PT_TRIANGLELIST_ADJ = 12, + VKD3D_PT_TRIANGLESTRIP_ADJ = 13, + VKD3D_PT_PATCH = 14, ++ ++ VKD3D_PT_COUNT = 15, + }; + + struct vkd3d_shader_primitive_type +@@ -1216,6 +1267,12 @@ static inline bool register_is_scalar_constant_zero(const struct vkd3d_shader_re + && (data_type_is_64_bit(reg->data_type) ? !reg->u.immconst_u64[0] : !reg->u.immconst_u32[0]); + } + ++static inline bool register_is_numeric_array(const struct vkd3d_shader_register *reg) ++{ ++ return (reg->type == VKD3DSPR_IMMCONSTBUFFER || reg->type == VKD3DSPR_IDXTEMP ++ || reg->type == VKD3DSPR_GROUPSHAREDMEM); ++} ++ + static inline bool vsir_register_is_label(const struct vkd3d_shader_register *reg) + { + return reg->type == VKD3DSPR_LABEL; +@@ -1268,6 +1325,8 @@ struct vkd3d_shader_instruction_array + struct vkd3d_shader_immediate_constant_buffer **icbs; + size_t icb_capacity; + size_t icb_count; ++ ++ struct vkd3d_shader_src_param *outpointid_param; + }; + + bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); +@@ -1278,6 +1337,8 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins + struct vkd3d_shader_immediate_constant_buffer *icb); + bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, + unsigned int dst, unsigned int src); ++struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( ++ struct vkd3d_shader_instruction_array *instructions); + void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); + + enum vkd3d_shader_config_flags +@@ -1290,7 +1351,12 @@ struct vsir_program struct vkd3d_shader_version shader_version; struct vkd3d_shader_instruction_array instructions; @@ -15039,12 +21216,20 @@ index 2d3b3254638..1f4320968d3 100644 + struct shader_signature patch_constant_signature; + unsigned int input_control_point_count, output_control_point_count; ++ unsigned int flat_constant_count[3]; unsigned int block_count; unsigned int temp_count; -@@ -1302,6 +1338,10 @@ struct vsir_program + unsigned int ssa_count; +@@ -1300,8 +1366,15 @@ struct vsir_program + size_t block_name_count; + }; - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); +-bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); void vsir_program_cleanup(struct vsir_program *program); ++int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context); ++bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); +enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); +enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, @@ -15052,30 +21237,46 @@ index 2d3b3254638..1f4320968d3 100644 static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( struct vsir_program *program, unsigned int count) -@@ -1333,6 +1373,9 @@ struct vkd3d_shader_parser_ops - void (*parser_destroy)(struct vkd3d_shader_parser *parser); +@@ -1319,32 +1392,21 @@ struct vkd3d_shader_parser + { + struct vkd3d_shader_message_context *message_context; + struct vkd3d_shader_location location; ++ struct vsir_program *program; + bool failed; +- +- struct vkd3d_shader_desc shader_desc; +- const struct vkd3d_shader_parser_ops *ops; +- struct vsir_program program; +- +- uint64_t config_flags; +-}; +- +-struct vkd3d_shader_parser_ops +-{ +- void (*parser_destroy)(struct vkd3d_shader_parser *parser); }; -+int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context); void vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); - bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, -@@ -1347,6 +1390,12 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse - parser->ops->parser_destroy(parser); +-bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, +- struct vkd3d_shader_message_context *message_context, const char *source_name, +- const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, +- unsigned int instruction_reserve); ++void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context, const char *source_name); + void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); + +-static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parser) ++static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser, uint64_t config_flags) + { +- parser->ops->parser_destroy(parser); ++ return vsir_program_validate(parser->program, config_flags, ++ parser->location.source_name, parser->message_context); } -+static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser) -+{ -+ return vsir_program_validate(&parser->program, parser->config_flags, -+ parser->location.source_name, parser->message_context); -+} -+ struct vkd3d_shader_descriptor_info1 - { - enum vkd3d_shader_descriptor_type type; -@@ -1385,21 +1434,22 @@ struct vkd3d_string_buffer_cache +@@ -1385,21 +1447,22 @@ struct vkd3d_string_buffer_cache size_t count, max_count, capacity; }; @@ -15103,7 +21304,7 @@ index 2d3b3254638..1f4320968d3 100644 int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); -@@ -1408,6 +1458,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct +@@ -1408,6 +1471,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct vkd3d_string_buffer_trace_(buffer, __FUNCTION__) void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function); int vkd3d_string_buffer_vprintf(struct vkd3d_string_buffer *buffer, const char *format, va_list args); @@ -15111,9 +21312,27 @@ index 2d3b3254638..1f4320968d3 100644 struct vkd3d_bytecode_buffer { -@@ -1483,20 +1534,16 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); +@@ -1472,35 +1536,32 @@ void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const s + enum vkd3d_shader_error error, const char *format, va_list args); + + void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info); ++uint64_t vkd3d_shader_init_config_flags(void); + void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); + #define vkd3d_shader_trace_text(text, size) \ + vkd3d_shader_trace_text_(text, size, __FUNCTION__) + +-int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); +-int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); +-int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); ++int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, ++ struct vkd3d_shader_message_context *message_context, struct vsir_program *program); ++int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, ++ struct vkd3d_shader_message_context *message_context, struct vsir_program *program); ++int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, ++ struct vkd3d_shader_message_context *message_context, struct vsir_program *program); -void free_shader_desc(struct vkd3d_shader_desc *desc); +void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); @@ -15137,7 +21356,12 @@ index 2d3b3254638..1f4320968d3 100644 #define SPIRV_MAX_SRC_COUNT 6 -@@ -1513,17 +1560,17 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, +-int spirv_compile(struct vkd3d_shader_parser *parser, ++int spirv_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); +@@ -1513,17 +1574,17 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); @@ -15157,7 +21381,7 @@ index 2d3b3254638..1f4320968d3 100644 case VKD3D_DATA_UINT: return VKD3D_SHADER_COMPONENT_UINT; case VKD3D_DATA_INT: -@@ -1585,6 +1632,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc +@@ -1585,6 +1646,11 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resourc } } @@ -15169,7 +21393,7 @@ index 2d3b3254638..1f4320968d3 100644 enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index); -@@ -1724,6 +1776,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ +@@ -1724,6 +1790,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ return compacted_swizzle; } @@ -15201,7 +21425,7 @@ index 2d3b3254638..1f4320968d3 100644 struct vkd3d_struct { enum vkd3d_shader_structure_type type; -@@ -1760,7 +1837,4 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void +@@ -1760,7 +1851,4 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void void dxbc_writer_init(struct dxbc_writer *dxbc); int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); @@ -15275,10 +21499,39 @@ index 00000000000..56ba6990420 + return 0; +} diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 7115a74a6f2..4a69ff530da 100644 +index 7115a74a6f2..95366d3441b 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -5414,6 +5414,26 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 +@@ -2052,20 +2052,15 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, + * state when GPU finishes execution of a command list. */ + if (is_swapchain_image) + { +- if (resource->present_state == D3D12_RESOURCE_STATE_PRESENT) +- { +- *access_mask = VK_ACCESS_MEMORY_READ_BIT; +- *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; +- if (image_layout) +- *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; +- return true; +- } +- else if (resource->present_state != D3D12_RESOURCE_STATE_COMMON) +- { +- vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, ++ if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) ++ return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, + resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); +- return true; +- } ++ ++ *access_mask = VK_ACCESS_MEMORY_READ_BIT; ++ *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; ++ if (image_layout) ++ *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; ++ return true; + } + + *access_mask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; +@@ -5414,6 +5409,26 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 | ((colour->uint32[2] & 0x3ff) << 22); return vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); @@ -15306,7 +21559,7 @@ index 7115a74a6f2..4a69ff530da 100644 return NULL; } diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 17c7ccb3e31..a394e3f7592 100644 +index 17c7ccb3e31..c8cfea43cc1 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -89,11 +89,13 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = @@ -15323,6 +21576,36 @@ index 17c7ccb3e31..a394e3f7592 100644 VK_EXTENSION(EXT_MUTABLE_DESCRIPTOR_TYPE, EXT_mutable_descriptor_type), VK_EXTENSION(EXT_ROBUSTNESS_2, EXT_robustness2), VK_EXTENSION(EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_shader_demote_to_helper_invocation), +@@ -270,13 +272,15 @@ static bool has_extension(const VkExtensionProperties *extensions, + + for (i = 0; i < count; ++i) + { +- if (is_extension_disabled(extension_name)) +- { +- WARN("Extension %s is disabled.\n", debugstr_a(extension_name)); +- continue; +- } + if (!strcmp(extensions[i].extensionName, extension_name)) ++ { ++ if (is_extension_disabled(extension_name)) ++ { ++ WARN("Extension %s is disabled.\n", debugstr_a(extension_name)); ++ return false; ++ } + return true; ++ } + } + return false; + } +@@ -420,8 +424,6 @@ static HRESULT vkd3d_init_instance_caps(struct vkd3d_instance *instance, + ERR("Failed to enumerate instance extensions, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } +- if (!count) +- return S_OK; + + if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) + return E_OUTOFMEMORY; @@ -557,12 +559,14 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, const struct vkd3d_optional_instance_extensions_info *optional_extensions; const struct vkd3d_application_info *vkd3d_application_info; @@ -15355,7 +21638,27 @@ index 17c7ccb3e31..a394e3f7592 100644 if ((vkd3d_application_info = vkd3d_find_struct(create_info->next, APPLICATION_INFO))) { if (vkd3d_application_info->application_name) -@@ -789,6 +803,7 @@ struct vkd3d_physical_device_info +@@ -774,6 +788,11 @@ VkInstance vkd3d_instance_get_vk_instance(struct vkd3d_instance *instance) + return instance->vk_instance; + } + ++static bool d3d12_device_environment_is_vulkan_min_1_1(struct d3d12_device *device) ++{ ++ return device->environment == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; ++} ++ + struct vkd3d_physical_device_info + { + /* properties */ +@@ -782,6 +801,7 @@ struct vkd3d_physical_device_info + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; + VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; ++ VkPhysicalDeviceSubgroupProperties subgroup_properties; + + VkPhysicalDeviceProperties2KHR properties2; + +@@ -789,6 +809,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_features; VkPhysicalDeviceDepthClipEnableFeaturesEXT depth_clip_features; VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features; @@ -15363,7 +21666,7 @@ index 17c7ccb3e31..a394e3f7592 100644 VkPhysicalDeviceRobustness2FeaturesEXT robustness2_features; VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; -@@ -796,6 +811,7 @@ struct vkd3d_physical_device_info +@@ -796,6 +817,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features; VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT mutable_features; @@ -15371,7 +21674,7 @@ index 17c7ccb3e31..a394e3f7592 100644 VkPhysicalDeviceFeatures2 features2; }; -@@ -808,6 +824,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i +@@ -808,6 +830,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; @@ -15379,15 +21682,16 @@ index 17c7ccb3e31..a394e3f7592 100644 VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; -@@ -818,6 +835,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i +@@ -818,13 +841,16 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; VkPhysicalDevice physical_device = device->vk_physical_device; + VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; ++ VkPhysicalDeviceSubgroupProperties *subgroup_properties; -@@ -825,6 +843,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + memset(info, 0, sizeof(*info)); conditional_rendering_features = &info->conditional_rendering_features; depth_clip_features = &info->depth_clip_features; descriptor_indexing_features = &info->descriptor_indexing_features; @@ -15395,33 +21699,96 @@ index 17c7ccb3e31..a394e3f7592 100644 robustness2_features = &info->robustness2_features; descriptor_indexing_properties = &info->descriptor_indexing_properties; maintenance3_properties = &info->maintenance3_properties; -@@ -835,6 +854,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i +@@ -835,31 +861,49 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vertex_divisor_properties = &info->vertex_divisor_properties; timeline_semaphore_features = &info->timeline_semaphore_features; mutable_features = &info->mutable_features; + formats4444_features = &info->formats4444_features; xfb_features = &info->xfb_features; xfb_properties = &info->xfb_properties; ++ subgroup_properties = &info->subgroup_properties; -@@ -846,6 +866,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - vk_prepend_struct(&info->features2, depth_clip_features); + info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + + conditional_rendering_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; +- vk_prepend_struct(&info->features2, conditional_rendering_features); ++ if (vulkan_info->EXT_conditional_rendering) ++ vk_prepend_struct(&info->features2, conditional_rendering_features); + depth_clip_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; +- vk_prepend_struct(&info->features2, depth_clip_features); ++ if (vulkan_info->EXT_depth_clip_enable) ++ vk_prepend_struct(&info->features2, depth_clip_features); descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; - vk_prepend_struct(&info->features2, descriptor_indexing_features); +- vk_prepend_struct(&info->features2, descriptor_indexing_features); ++ if (vulkan_info->EXT_descriptor_indexing) ++ vk_prepend_struct(&info->features2, descriptor_indexing_features); + fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, fragment_shader_interlock_features); ++ if (vulkan_info->EXT_fragment_shader_interlock) ++ vk_prepend_struct(&info->features2, fragment_shader_interlock_features); robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - vk_prepend_struct(&info->features2, robustness2_features); +- vk_prepend_struct(&info->features2, robustness2_features); ++ if (vulkan_info->EXT_robustness2) ++ vk_prepend_struct(&info->features2, robustness2_features); demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; -@@ -860,6 +882,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - vk_prepend_struct(&info->features2, timeline_semaphore_features); +- vk_prepend_struct(&info->features2, demote_features); ++ if (vulkan_info->EXT_shader_demote_to_helper_invocation) ++ vk_prepend_struct(&info->features2, demote_features); + buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; +- vk_prepend_struct(&info->features2, buffer_alignment_features); ++ if (vulkan_info->EXT_texel_buffer_alignment) ++ vk_prepend_struct(&info->features2, buffer_alignment_features); + xfb_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; +- vk_prepend_struct(&info->features2, xfb_features); ++ if (vulkan_info->EXT_transform_feedback) ++ vk_prepend_struct(&info->features2, xfb_features); + vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; +- vk_prepend_struct(&info->features2, vertex_divisor_features); ++ if (vulkan_info->EXT_vertex_attribute_divisor) ++ vk_prepend_struct(&info->features2, vertex_divisor_features); + timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; +- vk_prepend_struct(&info->features2, timeline_semaphore_features); ++ if (vulkan_info->KHR_timeline_semaphore) ++ vk_prepend_struct(&info->features2, timeline_semaphore_features); mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; - vk_prepend_struct(&info->features2, mutable_features); +- vk_prepend_struct(&info->features2, mutable_features); ++ if (vulkan_info->EXT_mutable_descriptor_type) ++ vk_prepend_struct(&info->features2, mutable_features); + formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, formats4444_features); ++ if (vulkan_info->EXT_4444_formats) ++ vk_prepend_struct(&info->features2, formats4444_features); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); -@@ -1158,6 +1182,7 @@ static void vkd3d_trace_physical_device_limits(const struct vkd3d_physical_devic +@@ -869,15 +913,23 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + + maintenance3_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; +- vk_prepend_struct(&info->properties2, maintenance3_properties); ++ if (vulkan_info->KHR_maintenance3) ++ vk_prepend_struct(&info->properties2, maintenance3_properties); + descriptor_indexing_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; +- vk_prepend_struct(&info->properties2, descriptor_indexing_properties); ++ if (vulkan_info->EXT_descriptor_indexing) ++ vk_prepend_struct(&info->properties2, descriptor_indexing_properties); + buffer_alignment_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; +- vk_prepend_struct(&info->properties2, buffer_alignment_properties); ++ if (vulkan_info->EXT_texel_buffer_alignment) ++ vk_prepend_struct(&info->properties2, buffer_alignment_properties); + xfb_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; +- vk_prepend_struct(&info->properties2, xfb_properties); ++ if (vulkan_info->EXT_transform_feedback) ++ vk_prepend_struct(&info->properties2, xfb_properties); + vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; +- vk_prepend_struct(&info->properties2, vertex_divisor_properties); ++ if (vulkan_info->EXT_vertex_attribute_divisor) ++ vk_prepend_struct(&info->properties2, vertex_divisor_properties); ++ subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; ++ if (d3d12_device_environment_is_vulkan_min_1_1(device)) ++ vk_prepend_struct(&info->properties2, subgroup_properties); + + if (vulkan_info->KHR_get_physical_device_properties2) + VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); +@@ -1158,6 +1210,7 @@ static void vkd3d_trace_physical_device_limits(const struct vkd3d_physical_devic static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_device_info *info) { @@ -15429,7 +21796,7 @@ index 17c7ccb3e31..a394e3f7592 100644 const VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; const VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; -@@ -1279,6 +1304,15 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev +@@ -1279,6 +1332,15 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev TRACE(" VkPhysicalDeviceDepthClipEnableFeaturesEXT:\n"); TRACE(" depthClipEnable: %#x.\n", depth_clip_features->depthClipEnable); @@ -15445,15 +21812,104 @@ index 17c7ccb3e31..a394e3f7592 100644 demote_features = &info->demote_features; TRACE(" VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT:\n"); TRACE(" shaderDemoteToHelperInvocation: %#x.\n", demote_features->shaderDemoteToHelperInvocation); -@@ -1476,6 +1510,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, +@@ -1470,22 +1532,92 @@ static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct + return true; + } + ++static HRESULT vkd3d_check_device_extensions(struct d3d12_device *device, ++ const struct vkd3d_device_create_info *create_info, VkExtensionProperties **vk_extensions, ++ uint32_t *vk_extension_count, uint32_t *device_extension_count, bool **user_extension_supported) ++{ ++ const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; ++ const struct vkd3d_optional_device_extensions_info *optional_extensions; ++ VkPhysicalDevice physical_device = device->vk_physical_device; ++ struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; ++ VkResult vr; ++ ++ *device_extension_count = 0; ++ ++ if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, vk_extension_count, NULL))) < 0) ++ { ++ ERR("Failed to enumerate device extensions, vr %d.\n", vr); ++ return hresult_from_vk_result(vr); ++ } ++ ++ if (!(*vk_extensions = vkd3d_calloc(*vk_extension_count, sizeof(**vk_extensions)))) ++ return E_OUTOFMEMORY; ++ ++ TRACE("Enumerating %u device extensions.\n", *vk_extension_count); ++ if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, vk_extension_count, *vk_extensions))) < 0) ++ { ++ ERR("Failed to enumerate device extensions, vr %d.\n", vr); ++ vkd3d_free(*vk_extensions); ++ return hresult_from_vk_result(vr); ++ } ++ ++ optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); ++ if (optional_extensions && optional_extensions->extension_count) ++ { ++ if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) ++ { ++ vkd3d_free(*vk_extensions); ++ return E_OUTOFMEMORY; ++ } ++ } ++ else ++ { ++ *user_extension_supported = NULL; ++ } ++ ++ *device_extension_count = vkd3d_check_extensions(*vk_extensions, *vk_extension_count, ++ required_device_extensions, ARRAY_SIZE(required_device_extensions), ++ optional_device_extensions, ARRAY_SIZE(optional_device_extensions), ++ create_info->device_extensions, create_info->device_extension_count, ++ optional_extensions ? optional_extensions->extensions : NULL, ++ optional_extensions ? optional_extensions->extension_count : 0, ++ *user_extension_supported, vulkan_info, "device", ++ device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); ++ ++ return S_OK; ++} ++ + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + const struct vkd3d_device_create_info *create_info, + struct vkd3d_physical_device_info *physical_device_info, uint32_t *device_extension_count, bool **user_extension_supported) { ++ const VkPhysicalDeviceSubgroupProperties *subgroup_properties = &physical_device_info->subgroup_properties; const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; +- const struct vkd3d_optional_device_extensions_info *optional_extensions; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; - const struct vkd3d_optional_device_extensions_info *optional_extensions; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; VkPhysicalDevice physical_device = device->vk_physical_device; -@@ -1539,8 +1574,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; + VkExtensionProperties *vk_extensions; + VkPhysicalDeviceFeatures *features; +- uint32_t count; +- VkResult vr; ++ uint32_t vk_extension_count; ++ HRESULT hr; + +- *device_extension_count = 0; ++ /* SHUFFLE is required to implement WaveReadLaneAt with dynamically uniform index before SPIR-V 1.5 / Vulkan 1.2. */ ++ static const VkSubgroupFeatureFlags required_subgroup_features = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT ++ | VK_SUBGROUP_FEATURE_BASIC_BIT ++ | VK_SUBGROUP_FEATURE_BALLOT_BIT ++ | VK_SUBGROUP_FEATURE_SHUFFLE_BIT ++ | VK_SUBGROUP_FEATURE_QUAD_BIT ++ | VK_SUBGROUP_FEATURE_VOTE_BIT; ++ ++ static const VkSubgroupFeatureFlags required_stages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; ++ ++ if (FAILED(hr = vkd3d_check_device_extensions(device, create_info, &vk_extensions, &vk_extension_count, ++ device_extension_count, user_extension_supported))) ++ return hr; ++ ++ vkd3d_physical_device_info_init(physical_device_info, device); + + vkd3d_trace_physical_device(physical_device, physical_device_info, vk_procs); + vkd3d_trace_physical_device_features(physical_device_info); +@@ -1539,8 +1671,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageReadWithoutFormat && d3d12_device_supports_typed_uav_load_additional_formats(device); @@ -15462,20 +21918,77 @@ index 17c7ccb3e31..a394e3f7592 100644 /* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */ device->feature_options.ConservativeRasterizationTier = D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED; device->feature_options.MaxGPUVirtualAddressBitsPerResource = 40; /* FIXME */ -@@ -1619,6 +1652,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - *user_extension_supported, vulkan_info, "device", - device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); +@@ -1550,10 +1680,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + device->feature_options.ResourceHeapTier = D3D12_RESOURCE_HEAP_TIER_2; + /* Shader Model 6 support. */ +- device->feature_options1.WaveOps = FALSE; +- device->feature_options1.WaveLaneCountMin = 0; +- device->feature_options1.WaveLaneCountMax = 0; +- device->feature_options1.TotalLaneCount = 0; ++ device->feature_options1.WaveOps = subgroup_properties->subgroupSize >= 4 ++ && (subgroup_properties->supportedOperations & required_subgroup_features) == required_subgroup_features ++ && (subgroup_properties->supportedStages & required_stages) == required_stages; ++ device->feature_options1.WaveLaneCountMin = subgroup_properties->subgroupSize; ++ device->feature_options1.WaveLaneCountMax = subgroup_properties->subgroupSize; ++ device->feature_options1.TotalLaneCount = 32 * subgroup_properties->subgroupSize; /* approx. */ + device->feature_options1.ExpandedComputeResourceStates = TRUE; + device->feature_options1.Int64ShaderOps = features->shaderInt64; + +@@ -1577,47 +1709,11 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + device->feature_options5.RenderPassesTier = D3D12_RENDER_PASS_TIER_0; + device->feature_options5.RaytracingTier = D3D12_RAYTRACING_TIER_NOT_SUPPORTED; + +- if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, NULL))) < 0) +- { +- ERR("Failed to enumerate device extensions, vr %d.\n", vr); +- return hresult_from_vk_result(vr); +- } +- if (!count) +- return S_OK; +- +- if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) +- return E_OUTOFMEMORY; +- +- TRACE("Enumerating %u device extensions.\n", count); +- if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, vk_extensions))) < 0) +- { +- ERR("Failed to enumerate device extensions, vr %d.\n", vr); +- vkd3d_free(vk_extensions); +- return hresult_from_vk_result(vr); +- } +- +- optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); +- if (optional_extensions && optional_extensions->extension_count) +- { +- if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) +- { +- vkd3d_free(vk_extensions); +- return E_OUTOFMEMORY; +- } +- } +- else +- { +- *user_extension_supported = NULL; +- } +- +- *device_extension_count = vkd3d_check_extensions(vk_extensions, count, +- required_device_extensions, ARRAY_SIZE(required_device_extensions), +- optional_device_extensions, ARRAY_SIZE(optional_device_extensions), +- create_info->device_extensions, create_info->device_extension_count, +- optional_extensions ? optional_extensions->extensions : NULL, +- optional_extensions ? optional_extensions->extension_count : 0, +- *user_extension_supported, vulkan_info, "device", +- device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); + fragment_shader_interlock = &physical_device_info->fragment_shader_interlock_features; + if (!fragment_shader_interlock->fragmentShaderSampleInterlock + || !fragment_shader_interlock->fragmentShaderPixelInterlock) + vulkan_info->EXT_fragment_shader_interlock = false; + device->feature_options.ROVsSupported = vulkan_info->EXT_fragment_shader_interlock; -+ + if (!physical_device_info->conditional_rendering_features.conditionalRendering) vulkan_info->EXT_conditional_rendering = false; - if (!physical_device_info->depth_clip_features.depthClipEnable) -@@ -1634,6 +1673,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, +@@ -1634,9 +1730,11 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, if (!physical_device_info->timeline_semaphore_features.timelineSemaphore) vulkan_info->KHR_timeline_semaphore = false; @@ -15483,8 +21996,12 @@ index 17c7ccb3e31..a394e3f7592 100644 + vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; - if (get_spec_version(vk_extensions, count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) -@@ -1675,6 +1716,10 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, +- if (get_spec_version(vk_extensions, count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) ++ if (get_spec_version(vk_extensions, vk_extension_count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) + { + const VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *divisor_features; + divisor_features = &physical_device_info->vertex_divisor_features; +@@ -1675,6 +1773,10 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] = VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING; @@ -15495,7 +22012,16 @@ index 17c7ccb3e31..a394e3f7592 100644 if (vulkan_info->EXT_shader_stencil_export) vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] = VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT; -@@ -2498,18 +2543,286 @@ static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cach +@@ -2029,8 +2131,6 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, + + VK_CALL(vkGetPhysicalDeviceMemoryProperties(physical_device, &device->memory_properties)); + +- vkd3d_physical_device_info_init(&physical_device_info, device); +- + if (FAILED(hr = vkd3d_init_device_caps(device, create_info, &physical_device_info, + &extension_count, &user_extension_supported))) + return hr; +@@ -2498,18 +2598,286 @@ static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cach } } @@ -15786,7 +22312,7 @@ index 17c7ccb3e31..a394e3f7592 100644 || IsEqualGUID(riid, &IID_ID3D12Device6) || IsEqualGUID(riid, &IID_ID3D12Device5) || IsEqualGUID(riid, &IID_ID3D12Device4) -@@ -2531,9 +2844,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *ifac +@@ -2531,9 +2899,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *ifac return E_NOINTERFACE; } @@ -15798,7 +22324,7 @@ index 17c7ccb3e31..a394e3f7592 100644 unsigned int refcount = vkd3d_atomic_increment_u32(&device->refcount); TRACE("%p increasing refcount to %u.\n", device, refcount); -@@ -2563,9 +2876,9 @@ static HRESULT device_worker_stop(struct d3d12_device *device) +@@ -2563,9 +2931,9 @@ static HRESULT device_worker_stop(struct d3d12_device *device) return S_OK; } @@ -15810,7 +22336,7 @@ index 17c7ccb3e31..a394e3f7592 100644 unsigned int refcount = vkd3d_atomic_decrement_u32(&device->refcount); TRACE("%p decreasing refcount to %u.\n", device, refcount); -@@ -2602,10 +2915,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) +@@ -2602,10 +2970,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) return refcount; } @@ -15823,7 +22349,7 @@ index 17c7ccb3e31..a394e3f7592 100644 TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); -@@ -2613,10 +2926,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *ifac +@@ -2613,10 +2981,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *ifac return vkd3d_get_private_data(&device->private_store, guid, data_size, data); } @@ -15836,7 +22362,7 @@ index 17c7ccb3e31..a394e3f7592 100644 TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); -@@ -2624,19 +2937,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *ifac +@@ -2624,19 +2992,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *ifac return vkd3d_set_private_data(&device->private_store, guid, data_size, data); } @@ -15860,7 +22386,7 @@ index 17c7ccb3e31..a394e3f7592 100644 TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); -@@ -2644,17 +2957,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, cons +@@ -2644,17 +3012,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, cons VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); } @@ -15881,7 +22407,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_command_queue *object; HRESULT hr; -@@ -2668,10 +2981,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 * +@@ -2668,10 +3036,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 * riid, command_queue); } @@ -15894,7 +22420,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_command_allocator *object; HRESULT hr; -@@ -2685,10 +2998,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic +@@ -2685,10 +3053,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic riid, command_allocator); } @@ -15907,7 +22433,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_pipeline_state *object; HRESULT hr; -@@ -2702,10 +3015,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 +@@ -2702,10 +3070,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 &IID_ID3D12PipelineState, riid, pipeline_state); } @@ -15920,7 +22446,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_pipeline_state *object; HRESULT hr; -@@ -2719,11 +3032,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D +@@ -2719,11 +3087,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D &IID_ID3D12PipelineState, riid, pipeline_state); } @@ -15934,7 +22460,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_command_list *object; HRESULT hr; -@@ -2846,10 +3159,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) +@@ -2846,10 +3214,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) return true; } @@ -15947,7 +22473,121 @@ index 17c7ccb3e31..a394e3f7592 100644 TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", iface, feature, feature_data, feature_data_size); -@@ -3521,10 +3834,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 +@@ -3095,9 +3463,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 + return E_INVALIDARG; + } + ++ if (data->HighestShaderModel != D3D_SHADER_MODEL_5_1 ++ && (data->HighestShaderModel < D3D_SHADER_MODEL_6_0 ++ || data->HighestShaderModel > D3D_HIGHEST_SHADER_MODEL)) ++ { ++ WARN("Unknown shader model %#x.\n", data->HighestShaderModel); ++ return E_INVALIDARG; ++ } ++ + TRACE("Request shader model %#x.\n", data->HighestShaderModel); + +- data->HighestShaderModel = D3D_SHADER_MODEL_5_1; ++#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL ++ data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_6_0); ++#else ++ data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_5_1); ++#endif + + TRACE("Shader model %#x.\n", data->HighestShaderModel); + return S_OK; +@@ -3515,16 +3895,101 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 + return S_OK; + } + ++ case D3D12_FEATURE_D3D12_OPTIONS14: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS14 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->AdvancedTextureOpsSupported = FALSE; ++ data->WriteableMSAATexturesSupported = FALSE; ++ data->IndependentFrontAndBackStencilRefMaskSupported = FALSE; ++ ++ TRACE("Advanced texture ops %#x.\n", data->AdvancedTextureOpsSupported); ++ TRACE("Writeable MSAA textures %#x.\n", data->WriteableMSAATexturesSupported); ++ TRACE("Independent front and back stencil ref mask %#x.\n", data->IndependentFrontAndBackStencilRefMaskSupported); ++ return S_OK; ++ } ++ ++ case D3D12_FEATURE_D3D12_OPTIONS15: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS15 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->TriangleFanSupported = FALSE; ++ data->DynamicIndexBufferStripCutSupported = FALSE; ++ ++ TRACE("Triangle fan %#x.\n", data->TriangleFanSupported); ++ TRACE("Dynamic index buffer strip cut %#x.\n", data->DynamicIndexBufferStripCutSupported); ++ return S_OK; ++ } ++ ++ case D3D12_FEATURE_D3D12_OPTIONS16: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS16 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->DynamicDepthBiasSupported = FALSE; ++ data->GPUUploadHeapSupported = FALSE; ++ ++ TRACE("Dynamic depth bias %#x.\n", data->DynamicDepthBiasSupported); ++ TRACE("GPU upload heap %#x.\n", data->GPUUploadHeapSupported); ++ return S_OK; ++ } ++ ++ case D3D12_FEATURE_D3D12_OPTIONS17: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS17 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->NonNormalizedCoordinateSamplersSupported = FALSE; ++ data->ManualWriteTrackingResourceSupported = FALSE; ++ ++ TRACE("Non-normalized coordinate samplers %#x.\n", data->NonNormalizedCoordinateSamplersSupported); ++ TRACE("Manual write tracking resource %#x.\n", data->ManualWriteTrackingResourceSupported); ++ return S_OK; ++ } ++ ++ case D3D12_FEATURE_D3D12_OPTIONS18: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS18 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->RenderPassesValid = FALSE; ++ ++ TRACE("Render passes valid %#x.\n", data->RenderPassesValid); ++ return S_OK; ++ } ++ + default: + FIXME("Unhandled feature %#x.\n", feature); + return E_NOTIMPL; } } @@ -15960,7 +22600,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_descriptor_heap *object; HRESULT hr; -@@ -3538,7 +3851,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 +@@ -3538,7 +4003,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 &IID_ID3D12DescriptorHeap, riid, descriptor_heap); } @@ -15969,7 +22609,7 @@ index 17c7ccb3e31..a394e3f7592 100644 D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) { TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); -@@ -3561,11 +3874,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D +@@ -3561,11 +4026,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D } } @@ -15983,7 +22623,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_root_signature *object; HRESULT hr; -@@ -3581,10 +3894,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 +@@ -3581,10 +4046,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 &IID_ID3D12RootSignature, riid, root_signature); } @@ -15996,7 +22636,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_desc tmp = {0}; TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); -@@ -3593,11 +3906,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device +@@ -3593,11 +4058,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } @@ -16010,7 +22650,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_desc tmp = {0}; TRACE("iface %p, resource %p, desc %p, descriptor %s.\n", -@@ -3607,11 +3920,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device +@@ -3607,11 +4072,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } @@ -16024,7 +22664,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_desc tmp = {0}; TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %s.\n", -@@ -3622,7 +3935,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic +@@ -3622,7 +4087,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } @@ -16033,7 +22673,7 @@ index 17c7ccb3e31..a394e3f7592 100644 ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { -@@ -3630,10 +3943,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 +@@ -3630,10 +4095,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 iface, resource, desc, debug_cpu_handle(descriptor)); d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), @@ -16046,7 +22686,7 @@ index 17c7ccb3e31..a394e3f7592 100644 ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { -@@ -3641,13 +3954,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 +@@ -3641,13 +4106,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 iface, resource, desc, debug_cpu_handle(descriptor)); d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), @@ -16063,7 +22703,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_desc tmp = {0}; TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); -@@ -3656,14 +3969,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, +@@ -3656,14 +4121,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } @@ -16080,7 +22720,7 @@ index 17c7ccb3e31..a394e3f7592 100644 unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; unsigned int dst_range_size, src_range_size; struct d3d12_descriptor_heap *dst_heap; -@@ -3719,7 +4032,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, +@@ -3719,7 +4184,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, } } @@ -16089,7 +22729,7 @@ index 17c7ccb3e31..a394e3f7592 100644 UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -@@ -3850,10 +4163,10 @@ static void d3d12_device_get_resource_allocation_info(struct d3d12_device *devic +@@ -3850,10 +4315,10 @@ static void d3d12_device_get_resource_allocation_info(struct d3d12_device *devic } static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( @@ -16102,7 +22742,7 @@ index 17c7ccb3e31..a394e3f7592 100644 TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p.\n", iface, info, visible_mask, count, resource_descs); -@@ -3865,10 +4178,10 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour +@@ -3865,10 +4330,10 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour return info; } @@ -16115,7 +22755,7 @@ index 17c7ccb3e31..a394e3f7592 100644 bool coherent; TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", -@@ -3908,12 +4221,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope +@@ -3908,12 +4373,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope return heap_properties; } @@ -16130,7 +22770,7 @@ index 17c7ccb3e31..a394e3f7592 100644 D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_resource *object; HRESULT hr; -@@ -3935,10 +4248,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi +@@ -3935,10 +4400,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } @@ -16143,7 +22783,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_heap *object; HRESULT hr; -@@ -3954,12 +4267,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, +@@ -3954,12 +4419,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); } @@ -16158,7 +22798,7 @@ index 17c7ccb3e31..a394e3f7592 100644 D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_heap *heap_object; struct d3d12_resource *object; -@@ -3980,11 +4293,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 +@@ -3980,11 +4445,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } @@ -16172,7 +22812,7 @@ index 17c7ccb3e31..a394e3f7592 100644 D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_resource *object; HRESULT hr; -@@ -4001,11 +4314,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic +@@ -4001,11 +4466,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } @@ -16186,7 +22826,7 @@ index 17c7ccb3e31..a394e3f7592 100644 FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", iface, object, attributes, (uint32_t)access, debugstr_w(name, device->wchar_size), handle); -@@ -4013,7 +4326,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 * +@@ -4013,7 +4478,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 * return E_NOTIMPL; } @@ -16195,7 +22835,7 @@ index 17c7ccb3e31..a394e3f7592 100644 HANDLE handle, REFIID riid, void **object) { FIXME("iface %p, handle %p, riid %s, object %p stub!\n", -@@ -4022,10 +4335,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *if +@@ -4022,10 +4487,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *if return E_NOTIMPL; } @@ -16208,7 +22848,7 @@ index 17c7ccb3e31..a394e3f7592 100644 FIXME("iface %p, name %s, access %#x, handle %p stub!\n", iface, debugstr_w(name, device->wchar_size), (uint32_t)access, handle); -@@ -4033,7 +4346,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic +@@ -4033,7 +4498,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic return E_NOTIMPL; } @@ -16217,7 +22857,7 @@ index 17c7ccb3e31..a394e3f7592 100644 UINT object_count, ID3D12Pageable * const *objects) { ID3D12Fence *fence; -@@ -4041,17 +4354,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, +@@ -4041,17 +4506,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, TRACE("iface %p, object_count %u, objects %p.\n", iface, object_count, objects); @@ -16238,7 +22878,7 @@ index 17c7ccb3e31..a394e3f7592 100644 UINT object_count, ID3D12Pageable * const *objects) { FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", -@@ -4060,10 +4373,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, +@@ -4060,10 +4525,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, return S_OK; } @@ -16251,7 +22891,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_fence *object; HRESULT hr; -@@ -4076,9 +4389,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, +@@ -4076,9 +4541,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); } @@ -16263,7 +22903,7 @@ index 17c7ccb3e31..a394e3f7592 100644 TRACE("iface %p.\n", iface); -@@ -4163,12 +4476,12 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, +@@ -4163,12 +4628,12 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, *total_bytes = total; } @@ -16278,7 +22918,7 @@ index 17c7ccb3e31..a394e3f7592 100644 D3D12_RESOURCE_DESC1 resource_desc; TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " -@@ -4182,10 +4495,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 * +@@ -4182,10 +4647,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 * base_offset, layouts, row_counts, row_sizes, total_bytes); } @@ -16291,7 +22931,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_query_heap *object; HRESULT hr; -@@ -4198,18 +4511,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *ifa +@@ -4198,18 +4663,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *ifa return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); } @@ -16313,7 +22953,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_command_signature *object; HRESULT hr; -@@ -4223,14 +4536,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic +@@ -4223,14 +4688,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic &IID_ID3D12CommandSignature, iid, command_signature); } @@ -16330,7 +22970,7 @@ index 17c7ccb3e31..a394e3f7592 100644 TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " "standard_title_shape %p, sub_resource_tiling_count %p, " -@@ -4243,9 +4556,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *ifac +@@ -4243,9 +4708,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *ifac sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); } @@ -16342,7 +22982,7 @@ index 17c7ccb3e31..a394e3f7592 100644 TRACE("iface %p, luid %p.\n", iface, luid); -@@ -4254,7 +4567,7 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface +@@ -4254,7 +4719,7 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface return luid; } @@ -16351,7 +22991,7 @@ index 17c7ccb3e31..a394e3f7592 100644 const void *blob, SIZE_T blob_size, REFIID iid, void **lib) { FIXME("iface %p, blob %p, blob_size %"PRIuPTR", iid %s, lib %p stub!\n", -@@ -4263,7 +4576,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device +@@ -4263,7 +4728,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device return DXGI_ERROR_UNSUPPORTED; } @@ -16360,7 +23000,7 @@ index 17c7ccb3e31..a394e3f7592 100644 ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) { -@@ -4273,7 +4586,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion( +@@ -4273,7 +4738,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion( return E_NOTIMPL; } @@ -16369,7 +23009,7 @@ index 17c7ccb3e31..a394e3f7592 100644 UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) { FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); -@@ -4281,10 +4594,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 +@@ -4281,10 +4746,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 return S_OK; } @@ -16382,7 +23022,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_pipeline_state *object; HRESULT hr; -@@ -4296,7 +4609,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 +@@ -4296,7 +4761,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 return return_interface(&object->ID3D12PipelineState_iface, &IID_ID3D12PipelineState, iid, pipeline_state); } @@ -16391,7 +23031,7 @@ index 17c7ccb3e31..a394e3f7592 100644 const void *address, REFIID iid, void **heap) { FIXME("iface %p, address %p, iid %s, heap %p stub!\n", iface, address, debugstr_guid(iid), heap); -@@ -4304,7 +4617,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12 +@@ -4304,7 +4769,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12 return E_NOTIMPL; } @@ -16400,7 +23040,7 @@ index 17c7ccb3e31..a394e3f7592 100644 HANDLE file_mapping, REFIID iid, void **heap) { FIXME("iface %p, file_mapping %p, iid %s, heap %p stub!\n", iface, file_mapping, debugstr_guid(iid), heap); -@@ -4312,7 +4625,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID +@@ -4312,7 +4777,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID return E_NOTIMPL; } @@ -16409,7 +23049,7 @@ index 17c7ccb3e31..a394e3f7592 100644 D3D12_RESIDENCY_FLAGS flags, UINT num_objects, ID3D12Pageable *const *objects, ID3D12Fence *fence, UINT64 fence_value) { -@@ -4323,7 +4636,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 +@@ -4323,7 +4788,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 return S_OK; } @@ -16418,7 +23058,7 @@ index 17c7ccb3e31..a394e3f7592 100644 UINT node_mask, D3D12_COMMAND_LIST_TYPE type, D3D12_COMMAND_LIST_FLAGS flags, REFIID iid, void **command_list) { -@@ -4333,7 +4646,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 * +@@ -4333,7 +4798,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 * return E_NOTIMPL; } @@ -16427,7 +23067,7 @@ index 17c7ccb3e31..a394e3f7592 100644 const D3D12_PROTECTED_RESOURCE_SESSION_DESC *desc, REFIID iid, void **session) { FIXME("iface %p, desc %p, iid %s, session %p stub!\n", iface, desc, debugstr_guid(iid), session); -@@ -4341,13 +4654,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3 +@@ -4341,13 +4806,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3 return E_NOTIMPL; } @@ -16443,7 +23083,7 @@ index 17c7ccb3e31..a394e3f7592 100644 D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_resource *object; HRESULT hr; -@@ -4369,11 +4682,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Dev +@@ -4369,11 +4834,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Dev return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } @@ -16457,7 +23097,7 @@ index 17c7ccb3e31..a394e3f7592 100644 struct d3d12_heap *object; HRESULT hr; -@@ -4389,7 +4702,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, +@@ -4389,7 +4854,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); } @@ -16466,7 +23106,7 @@ index 17c7ccb3e31..a394e3f7592 100644 const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) -@@ -4403,11 +4716,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Devi +@@ -4403,11 +4868,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Devi } static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo1( @@ -16480,7 +23120,7 @@ index 17c7ccb3e31..a394e3f7592 100644 TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", iface, info, visible_mask, count, resource_descs, info1); -@@ -4419,7 +4732,7 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour +@@ -4419,7 +4884,7 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour return info; } @@ -16489,7 +23129,7 @@ index 17c7ccb3e31..a394e3f7592 100644 ID3D12LifetimeOwner *owner, REFIID iid, void **tracker) { FIXME("iface %p, owner %p, iid %s, tracker %p stub!\n", iface, owner, debugstr_guid(iid), tracker); -@@ -4427,12 +4740,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device +@@ -4427,12 +4892,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device return E_NOTIMPL; } @@ -16504,7 +23144,7 @@ index 17c7ccb3e31..a394e3f7592 100644 UINT *num_meta_commands, D3D12_META_COMMAND_DESC *command_desc) { FIXME("iface %p, num_meta_commands %p, command_desc %p stub!\n", iface, -@@ -4441,7 +4754,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device +@@ -4441,7 +4906,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device return E_NOTIMPL; } @@ -16513,7 +23153,7 @@ index 17c7ccb3e31..a394e3f7592 100644 REFGUID command_id, D3D12_META_COMMAND_PARAMETER_STAGE stage, UINT *size_in_bytes, UINT *parameter_count, D3D12_META_COMMAND_PARAMETER_DESC *parameter_desc) -@@ -4453,7 +4766,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3 +@@ -4453,7 +4918,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3 return E_NOTIMPL; } @@ -16522,7 +23162,7 @@ index 17c7ccb3e31..a394e3f7592 100644 REFGUID command_id, UINT node_mask, const void *parameters_data, SIZE_T data_size_in_bytes, REFIID iid, void **meta_command) { -@@ -4465,7 +4778,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *i +@@ -4465,7 +4930,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *i return E_NOTIMPL; } @@ -16531,7 +23171,7 @@ index 17c7ccb3e31..a394e3f7592 100644 const D3D12_STATE_OBJECT_DESC *desc, REFIID iid, void **state_object) { FIXME("iface %p, desc %p, iid %s, state_object %p stub!\n", iface, desc, debugstr_guid(iid), state_object); -@@ -4473,14 +4786,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *i +@@ -4473,14 +4938,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *i return E_NOTIMPL; } @@ -16548,7 +23188,7 @@ index 17c7ccb3e31..a394e3f7592 100644 D3D12_SERIALIZED_DATA_TYPE data_type, const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER *identifier) { FIXME("iface %p, data_type %u, identifier %p stub!\n", iface, data_type, identifier); -@@ -4488,7 +4801,7 @@ static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_Ch +@@ -4488,7 +4953,7 @@ static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_Ch return D3D12_DRIVER_MATCHING_IDENTIFIER_UNRECOGNIZED; } @@ -16557,7 +23197,7 @@ index 17c7ccb3e31..a394e3f7592 100644 D3D12_BACKGROUND_PROCESSING_MODE mode, D3D12_MEASUREMENTS_ACTION action, HANDLE event, BOOL *further_measurements_desired) { -@@ -4498,7 +4811,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12 +@@ -4498,7 +4963,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12 return E_NOTIMPL; } @@ -16567,7 +23207,7 @@ index 17c7ccb3e31..a394e3f7592 100644 const D3D12_STATE_OBJECT_DESC *addition, ID3D12StateObject *state_object_to_grow_from, REFIID riid, void **new_state_object) { -@@ -4508,7 +4822,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *if +@@ -4508,7 +4974,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *if return E_NOTIMPL; } @@ -16576,7 +23216,7 @@ index 17c7ccb3e31..a394e3f7592 100644 const D3D12_PROTECTED_RESOURCE_SESSION_DESC1 *desc, REFIID riid, void **session) { FIXME("iface %p, desc %p, riid %s, session %p stub!\n", iface, desc, debugstr_guid(riid), session); -@@ -4516,7 +4830,167 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID +@@ -4516,7 +4982,167 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID return E_NOTIMPL; } @@ -16745,7 +23385,7 @@ index 17c7ccb3e31..a394e3f7592 100644 { /* IUnknown methods */ d3d12_device_QueryInterface, -@@ -4596,14 +5070,24 @@ static const struct ID3D12Device7Vtbl d3d12_device_vtbl = +@@ -4596,14 +5222,24 @@ static const struct ID3D12Device7Vtbl d3d12_device_vtbl = /* ID3D12Device7 methods */ d3d12_device_AddToStateObject, d3d12_device_CreateProtectedResourceSession1, @@ -16772,7 +23412,7 @@ index 17c7ccb3e31..a394e3f7592 100644 } static void *device_worker_main(void *arg) -@@ -4646,13 +5130,15 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, +@@ -4646,13 +5282,15 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; @@ -16789,7 +23429,7 @@ index 17c7ccb3e31..a394e3f7592 100644 device->adapter_luid = create_info->adapter_luid; device->removed_reason = S_OK; -@@ -4894,28 +5380,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha +@@ -4894,28 +5532,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha IUnknown *vkd3d_get_device_parent(ID3D12Device *device) { @@ -16883,7 +23523,7 @@ index 89764d0901d..179999148bc 100644 HRESULT hr; diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 08cc110e8f7..6ba29c18004 100644 +index 08cc110e8f7..199d8043ffe 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -2045,6 +2045,9 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState @@ -16896,7 +23536,16 @@ index 08cc110e8f7..6ba29c18004 100644 vkd3d_free(state); d3d12_device_release(device); -@@ -2413,8 +2416,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st +@@ -2156,6 +2159,8 @@ static unsigned int feature_flags_compile_option(const struct d3d12_device *devi + flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_INT64; + if (device->feature_options.DoublePrecisionFloatShaderOps) + flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64; ++ if (device->feature_options1.WaveOps) ++ flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS; + + return flags; + } +@@ -2413,8 +2418,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_shader_interface_info shader_interface; struct vkd3d_shader_descriptor_offset_info offset_info; @@ -16906,7 +23555,7 @@ index 08cc110e8f7..6ba29c18004 100644 VkPipelineLayout vk_pipeline_layout; HRESULT hr; -@@ -2425,17 +2428,31 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st +@@ -2425,17 +2430,31 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->root_signature))) { @@ -16941,7 +23590,7 @@ index 08cc110e8f7..6ba29c18004 100644 target_info.extensions = device->vk_info.shader_extensions; target_info.extension_count = device->vk_info.shader_extension_count; -@@ -2476,6 +2493,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st +@@ -2476,6 +2495,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { WARN("Failed to create Vulkan compute pipeline, hr %s.\n", debugstr_hresult(hr)); d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); @@ -16950,7 +23599,7 @@ index 08cc110e8f7..6ba29c18004 100644 return hr; } -@@ -2483,6 +2502,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st +@@ -2483,6 +2504,8 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { VK_CALL(vkDestroyPipeline(device->vk_device, state->u.compute.vk_pipeline, NULL)); d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); @@ -16959,7 +23608,7 @@ index 08cc110e8f7..6ba29c18004 100644 return hr; } -@@ -3156,7 +3177,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s +@@ -3156,7 +3179,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s ps_target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; ps_target_info.next = NULL; ps_target_info.entry_point = "main"; @@ -16968,7 +23617,7 @@ index 08cc110e8f7..6ba29c18004 100644 ps_target_info.extensions = vk_info->shader_extensions; ps_target_info.extension_count = vk_info->shader_extension_count; ps_target_info.parameters = ps_shader_parameters; -@@ -3186,7 +3207,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s +@@ -3186,7 +3209,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s memset(&target_info, 0, sizeof(target_info)); target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO; @@ -16977,7 +23626,7 @@ index 08cc110e8f7..6ba29c18004 100644 target_info.extensions = vk_info->shader_extensions; target_info.extension_count = vk_info->shader_extension_count; -@@ -3484,6 +3505,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s +@@ -3484,6 +3507,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; @@ -16986,7 +23635,7 @@ index 08cc110e8f7..6ba29c18004 100644 return S_OK; diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c -index ac79ae5ddff..58747342b5c 100644 +index ac79ae5ddff..11029c9f5f9 100644 --- a/libs/vkd3d/libs/vkd3d/utils.c +++ b/libs/vkd3d/libs/vkd3d/utils.c @@ -87,6 +87,8 @@ static const struct vkd3d_format vkd3d_formats[] = @@ -17020,6 +23669,37 @@ index ac79ae5ddff..58747342b5c 100644 return NULL; } +@@ -891,6 +901,30 @@ bool vkd3d_get_program_name(char program_name[PATH_MAX]) + return true; + } + ++#elif defined(WIN32) ++ ++bool vkd3d_get_program_name(char program_name[PATH_MAX]) ++{ ++ char buffer[MAX_PATH]; ++ char *p, *name; ++ size_t len; ++ ++ *program_name = '\0'; ++ len = GetModuleFileNameA(NULL, buffer, ARRAY_SIZE(buffer)); ++ if (!(len && len < MAX_PATH)) ++ return false; ++ ++ name = buffer; ++ if ((p = strrchr(name, '/'))) ++ name = p + 1; ++ if ((p = strrchr(name, '\\'))) ++ name = p + 1; ++ ++ len = strlen(name) + 1; ++ memcpy(program_name, name, len); ++ return true; ++} ++ + #else + + bool vkd3d_get_program_name(char program_name[PATH_MAX]) diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c index 7919b7d8760..29305fbdc63 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c @@ -17071,10 +23751,21 @@ index 7919b7d8760..29305fbdc63 100644 } vkd3d_shader_free_messages(messages); diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index b092bb26ded..e0a7acb306d 100644 +index b092bb26ded..5f60c8d90ad 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -55,7 +55,7 @@ +@@ -24,10 +24,6 @@ + #define VK_NO_PROTOTYPES + #define CONST_VTABLE + +-#ifdef _WIN32 +-# define _WIN32_WINNT 0x0600 /* for condition variables */ +-#endif +- + #include "vkd3d_common.h" + #include "vkd3d_blob.h" + #include "vkd3d_memory.h" +@@ -55,7 +51,7 @@ #define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u #define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u @@ -17083,7 +23774,7 @@ index b092bb26ded..e0a7acb306d 100644 #define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u -@@ -128,11 +128,13 @@ struct vkd3d_vulkan_info +@@ -128,11 +124,13 @@ struct vkd3d_vulkan_info bool KHR_sampler_mirror_clamp_to_edge; bool KHR_timeline_semaphore; /* EXT device extensions */ @@ -17097,7 +23788,7 @@ index b092bb26ded..e0a7acb306d 100644 bool EXT_mutable_descriptor_type; bool EXT_robustness2; bool EXT_shader_demote_to_helper_invocation; -@@ -184,6 +186,7 @@ struct vkd3d_instance +@@ -184,6 +182,7 @@ struct vkd3d_instance struct vkd3d_vulkan_info vk_info; struct vkd3d_vk_global_procs vk_global_procs; void *libvulkan; @@ -17105,7 +23796,7 @@ index b092bb26ded..e0a7acb306d 100644 uint64_t config_flags; enum vkd3d_api_version api_version; -@@ -202,36 +205,11 @@ union vkd3d_thread_handle +@@ -202,61 +201,6 @@ union vkd3d_thread_handle void *handle; }; @@ -17114,11 +23805,11 @@ index b092bb26ded..e0a7acb306d 100644 - CRITICAL_SECTION lock; -}; - - struct vkd3d_cond - { - CONDITION_VARIABLE cond; - }; - +-struct vkd3d_cond +-{ +- CONDITION_VARIABLE cond; +-}; +- -static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) -{ - InitializeCriticalSection(&lock->lock); @@ -17139,10 +23830,35 @@ index b092bb26ded..e0a7acb306d 100644 - DeleteCriticalSection(&lock->lock); -} - - static inline void vkd3d_cond_init(struct vkd3d_cond *cond) +-static inline void vkd3d_cond_init(struct vkd3d_cond *cond) +-{ +- InitializeConditionVariable(&cond->cond); +-} +- +-static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) +-{ +- WakeConditionVariable(&cond->cond); +-} +- +-static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) +-{ +- WakeAllConditionVariable(&cond->cond); +-} +- +-static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) +-{ +- if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) +- ERR("Could not sleep on the condition variable, error %lu.\n", GetLastError()); +-} +- +-static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) +-{ +-} +- + static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) { - InitializeConditionVariable(&cond->cond); -@@ -287,53 +265,11 @@ union vkd3d_thread_handle + return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; +@@ -287,98 +231,6 @@ union vkd3d_thread_handle void *handle; }; @@ -17151,11 +23867,11 @@ index b092bb26ded..e0a7acb306d 100644 - pthread_mutex_t lock; -}; - - struct vkd3d_cond - { - pthread_cond_t cond; - }; - +-struct vkd3d_cond +-{ +- pthread_cond_t cond; +-}; +- - -static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) -{ @@ -17193,10 +23909,55 @@ index b092bb26ded..e0a7acb306d 100644 - ERR("Could not destroy the mutex, error %d.\n", ret); -} - - static inline void vkd3d_cond_init(struct vkd3d_cond *cond) +-static inline void vkd3d_cond_init(struct vkd3d_cond *cond) +-{ +- int ret; +- +- ret = pthread_cond_init(&cond->cond, NULL); +- if (ret) +- ERR("Could not initialize the condition variable, error %d.\n", ret); +-} +- +-static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) +-{ +- int ret; +- +- ret = pthread_cond_signal(&cond->cond); +- if (ret) +- ERR("Could not signal the condition variable, error %d.\n", ret); +-} +- +-static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) +-{ +- int ret; +- +- ret = pthread_cond_broadcast(&cond->cond); +- if (ret) +- ERR("Could not broadcast the condition variable, error %d.\n", ret); +-} +- +-static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) +-{ +- int ret; +- +- ret = pthread_cond_wait(&cond->cond, &lock->lock); +- if (ret) +- ERR("Could not wait on the condition variable, error %d.\n", ret); +-} +- +-static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) +-{ +- int ret; +- +- ret = pthread_cond_destroy(&cond->cond); +- if (ret) +- ERR("Could not destroy the condition variable, error %d.\n", ret); +-} +- + # if HAVE_SYNC_BOOL_COMPARE_AND_SWAP + static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) { - int ret; -@@ -1279,6 +1215,7 @@ struct d3d12_pipeline_state +@@ -1279,6 +1131,7 @@ struct d3d12_pipeline_state struct d3d12_pipeline_uav_counter_state uav_counters; @@ -17204,7 +23965,7 @@ index b092bb26ded..e0a7acb306d 100644 struct d3d12_device *device; struct vkd3d_private_store private_store; -@@ -1735,7 +1672,7 @@ struct vkd3d_desc_object_cache +@@ -1735,7 +1588,7 @@ struct vkd3d_desc_object_cache /* ID3D12Device */ struct d3d12_device { @@ -17213,7 +23974,7 @@ index b092bb26ded..e0a7acb306d 100644 unsigned int refcount; VkDevice vk_device; -@@ -1743,6 +1680,7 @@ struct d3d12_device +@@ -1743,6 +1596,7 @@ struct d3d12_device struct vkd3d_vk_device_procs vk_procs; PFN_vkd3d_signal_event signal_event; size_t wchar_size; @@ -17221,7 +23982,7 @@ index b092bb26ded..e0a7acb306d 100644 struct vkd3d_gpu_va_allocator gpu_va_allocator; -@@ -1810,29 +1748,29 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 +@@ -1810,29 +1664,29 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); @@ -17256,7 +24017,7 @@ index b092bb26ded..e0a7acb306d 100644 } /* utils */ -@@ -1993,4 +1931,10 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) +@@ -1993,4 +1847,10 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) vkd3d_header->next = vkd3d_structure; } diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch deleted file mode 100644 index 9289af36..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch +++ /dev/null @@ -1,1024 +0,0 @@ -From 7365c2f891b2cdfa4b9610b143bdccafd80851ec Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Tue, 23 Apr 2024 08:01:19 +1000 -Subject: [PATCH] Updated vkd3d to 7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6eaeb4. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 70 ++++++ - libs/vkd3d/include/vkd3d.h | 115 +++++++++- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 205 ++++++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 2 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 72 ++++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 14 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 5 + - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 84 ------- - 9 files changed, 430 insertions(+), 138 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index f9df47d339c..1da73bcfb2e 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -492,6 +492,76 @@ static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) - #endif - } - -+struct vkd3d_cond -+{ -+#ifdef _WIN32 -+ CONDITION_VARIABLE cond; -+#else -+ pthread_cond_t cond; -+#endif -+}; -+ -+static inline void vkd3d_cond_init(struct vkd3d_cond *cond) -+{ -+#ifdef _WIN32 -+ InitializeConditionVariable(&cond->cond); -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_init(&cond->cond, NULL))) -+ ERR("Failed to initialise the condition variable, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) -+{ -+#ifdef _WIN32 -+ WakeConditionVariable(&cond->cond); -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_signal(&cond->cond))) -+ ERR("Failed to signal the condition variable, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) -+{ -+#ifdef _WIN32 -+ WakeAllConditionVariable(&cond->cond); -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_broadcast(&cond->cond))) -+ ERR("Failed to broadcast the condition variable, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) -+{ -+#ifdef _WIN32 -+ if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) -+ ERR("Failed to wait on the condition variable, error %lu.\n", GetLastError()); -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_wait(&cond->cond, &lock->lock))) -+ ERR("Failed to wait on the condition variable, ret %d.\n", ret); -+#endif -+} -+ -+static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) -+{ -+#ifdef _WIN32 -+ /* Nothing to do. */ -+#else -+ int ret; -+ -+ if ((ret = pthread_cond_destroy(&cond->cond))) -+ ERR("Failed to destroy the condition variable, ret %d.\n", ret); -+#endif -+} -+ - static inline void vkd3d_parse_version(const char *version, int *major, int *minor) - { - *major = atoi(version); -diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index aa68b70e1bf..71c56331d86 100644 ---- a/libs/vkd3d/include/vkd3d.h -+++ b/libs/vkd3d/include/vkd3d.h -@@ -46,21 +46,37 @@ extern "C" { - * \since 1.0 - */ - -+/** The type of a chained structure. */ - enum vkd3d_structure_type - { -- /* 1.0 */ -+ /** The structure is a vkd3d_instance_create_info structure. */ - VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, -+ /** The structure is a vkd3d_device_create_info structure. */ - VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO, -+ /** The structure is a vkd3d_image_resource_create_info structure. */ - VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO, - -- /* 1.1 */ -+ /** -+ * The structure is a vkd3d_optional_instance_extensions_info structure. -+ * \since 1.1 -+ */ - VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO, - -- /* 1.2 */ -+ /** -+ * The structure is a vkd3d_optional_device_extensions_info structure. -+ * \since 1.2 -+ */ - VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO, -+ /** -+ * The structure is a vkd3d_application_info structure. -+ * \since 1.2 -+ */ - VKD3D_STRUCTURE_TYPE_APPLICATION_INFO, - -- /* 1.3 */ -+ /** -+ * The structure is a vkd3d_host_time_domain_info structure. -+ * \since 1.3 -+ */ - VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_STRUCTURE_TYPE), -@@ -93,54 +109,131 @@ typedef HRESULT (*PFN_vkd3d_join_thread)(void *thread); - - struct vkd3d_instance; - -+/** -+ * A chained structure containing instance creation parameters. -+ */ - struct vkd3d_instance_create_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** An pointer to a function to signal events. */ - PFN_vkd3d_signal_event pfn_signal_event; -+ /** -+ * An optional pointer to a function to create threads. If this is NULL vkd3d will use a -+ * function of its choice, depending on the platform. It must be NULL if and only if -+ * pfn_join_thread is NULL. -+ */ - PFN_vkd3d_create_thread pfn_create_thread; -+ /** -+ * An optional pointer to a function to join threads. If this is NULL vkd3d will use a -+ * function of its choice, depending on the platform. It must be NULL if and only if -+ * pfn_create_thread is NULL. -+ */ - PFN_vkd3d_join_thread pfn_join_thread; -+ /** The size of type WCHAR. It must be 2 or 4 and should normally be set to sizeof(WCHAR). */ - size_t wchar_size; - -- /* If set to NULL, libvkd3d loads libvulkan. */ -+ /** -+ * A pointer to the vkGetInstanceProcAddr Vulkan function, which will be used to load all the -+ * other Vulkan functions. If set to NULL, vkd3d will search and use the Vulkan loader. -+ */ - PFN_vkGetInstanceProcAddr pfn_vkGetInstanceProcAddr; - -+ /** -+ * A list of Vulkan instance extensions to request. They are intended as required, so instance -+ * creation will fail if any of them is not available. -+ */ - const char * const *instance_extensions; -+ /** The number of elements in the instance_extensions array. */ - uint32_t instance_extension_count; - }; - --/* Extends vkd3d_instance_create_info. Available since 1.1. */ -+/** -+ * A chained structure to specify optional instance extensions. -+ * -+ * This structure extends vkd3d_instance_create_info. -+ * -+ * \since 1.1 -+ */ - struct vkd3d_optional_instance_extensions_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** -+ * A list of optional Vulkan instance extensions to request. Instance creation does not fail if -+ * they are not available. -+ */ - const char * const *extensions; -+ /** The number of elements in the extensions array. */ - uint32_t extension_count; - }; - --/* Extends vkd3d_instance_create_info. Available since 1.2. */ -+/** -+ * A chained structure to specify application information. -+ * -+ * This structure extends vkd3d_instance_create_info. -+ * -+ * \since 1.2 -+ */ - struct vkd3d_application_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_APPLICATION_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** -+ * The application's name, to be passed to the Vulkan implementation. If it is NULL, a name is -+ * computed from the process executable filename. If that cannot be done, the empty string is -+ * used. -+ */ - const char *application_name; -+ /** The application's version, to be passed to the Vulkan implementation. */ - uint32_t application_version; - -- const char *engine_name; /* "vkd3d" if NULL */ -- uint32_t engine_version; /* vkd3d version if engine_name is NULL */ -- -+ /** -+ * The engine name, to be passed to the Vulkan implementation. If it is NULL, "vkd3d" is used. -+ */ -+ const char *engine_name; -+ /** -+ * The engine version, to be passed to the Vulkan implementation. If it is 0, the version is -+ * computed from the vkd3d library version. -+ */ -+ uint32_t engine_version; -+ -+ /** -+ * The vkd3d API version to use, to guarantee backward compatibility of the shared library. If -+ * this chained structure is not used then VKD3D_API_VERSION_1_0 is used. -+ */ - enum vkd3d_api_version api_version; - }; - --/* Extends vkd3d_instance_create_info. Available since 1.3. */ -+/** -+ * A chained structure to specify the host time domain. -+ * -+ * This structure extends vkd3d_instance_create_info. -+ * -+ * \since 1.3 -+ */ - struct vkd3d_host_time_domain_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_HOST_TIME_DOMAIN_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** -+ * The number of clock ticks per second, used for GetClockCalibration(). It should normally -+ * match the expected result of QueryPerformanceFrequency(). If this chained structure is not -+ * used then 10 millions is used, which means that each tick is a tenth of microsecond, or -+ * equivalently 100 nanoseconds. -+ */ - uint64_t ticks_per_second; - }; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 220ba773887..d07d5adee70 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -419,6 +419,11 @@ enum dx_intrinsic_opcode - DX_FLATTENED_THREAD_ID_IN_GROUP = 96, - DX_MAKE_DOUBLE = 101, - DX_SPLIT_DOUBLE = 102, -+ DX_LOAD_OUTPUT_CONTROL_POINT = 103, -+ DX_LOAD_PATCH_CONSTANT = 104, -+ DX_DOMAIN_LOCATION = 105, -+ DX_STORE_PATCH_CONSTANT = 106, -+ DX_OUTPUT_CONTROL_POINT_ID = 107, - DX_PRIMITIVE_ID = 108, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, -@@ -799,6 +804,7 @@ struct sm6_parser - - struct vkd3d_shader_dst_param *output_params; - struct vkd3d_shader_dst_param *input_params; -+ struct vkd3d_shader_dst_param *patch_constant_params; - uint32_t io_regs_declared[(VKD3DSPR_COUNT + 0x1f) / 0x20]; - - struct sm6_function *functions; -@@ -2433,10 +2439,12 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, - if (sm6_value_is_constant(address)) - { - idx->offset = sm6_value_get_constant_uint(address); -+ idx->rel_addr = NULL; - } - else if (sm6_value_is_undef(address)) - { - idx->offset = 0; -+ idx->rel_addr = NULL; - } - else - { -@@ -2515,7 +2523,7 @@ static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, - * overestimate the value count somewhat, but this should be no problem. */ - value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); - sm6->value_capacity = max(sm6->value_capacity, value_count); -- sm6->functions[sm6->function_count].value_count = value_count; -+ sm6->functions[sm6->function_count++].value_count = value_count; - /* The value count returns to its previous value after handling a function. */ - if (value_count < SIZE_MAX) - value_count = old_value_count; -@@ -3689,12 +3697,35 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( - } - - static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, -- enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) -+ bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) - { -+ enum vkd3d_shader_type shader_type = sm6->p.program.shader_version.type; -+ bool is_patch_constant, is_control_point; - struct vkd3d_shader_dst_param *param; - const struct signature_element *e; - unsigned int i, count; - -+ is_patch_constant = reg_type == VKD3DSPR_PATCHCONST; -+ -+ is_control_point = false; -+ if (!is_patch_constant) -+ { -+ switch (shader_type) -+ { -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ case VKD3D_SHADER_TYPE_GEOMETRY: -+ is_control_point = is_input; -+ break; -+ -+ case VKD3D_SHADER_TYPE_HULL: -+ is_control_point = true; -+ break; -+ -+ default: -+ break; -+ } -+ } -+ - for (i = 0; i < s->element_count; ++i) - { - e = &s->elements[i]; -@@ -3709,8 +3740,18 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - - dst_param_io_init(param, e, reg_type); - count = 0; -- if (e->register_count > 1) -+ -+ if (is_control_point) -+ { -+ if (reg_type == VKD3DSPR_OUTPUT) -+ param->reg.idx[count].rel_addr = instruction_array_create_outpointid_param(&sm6->p.program.instructions); -+ param->reg.idx[count++].offset = 0; -+ } -+ -+ if (e->register_count > 1 || (is_patch_constant && vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) - param->reg.idx[count++].offset = 0; -+ -+ assert(count < ARRAY_SIZE(param->reg.idx)); - param->reg.idx[count++].offset = i; - param->reg.idx_count = count; - } -@@ -3718,12 +3759,21 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - - static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) - { -- sm6_parser_init_signature(sm6, output_signature, VKD3DSPR_OUTPUT, sm6->output_params); -+ sm6_parser_init_signature(sm6, output_signature, false, VKD3DSPR_OUTPUT, sm6->output_params); - } - - static void sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) - { -- sm6_parser_init_signature(sm6, input_signature, VKD3DSPR_INPUT, sm6->input_params); -+ sm6_parser_init_signature(sm6, input_signature, true, VKD3DSPR_INPUT, sm6->input_params); -+} -+ -+static void sm6_parser_init_patch_constant_signature(struct sm6_parser *sm6, -+ const struct shader_signature *patch_constant_signature) -+{ -+ bool is_input = sm6->p.program.shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; -+ -+ sm6_parser_init_signature(sm6, patch_constant_signature, is_input, VKD3DSPR_PATCHCONST, -+ sm6->patch_constant_params); - } - - static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) -@@ -4752,6 +4802,33 @@ static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic - src_param_init_from_value(src_param, operands[0]); - } - -+static void sm6_parser_emit_dx_domain_location(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ unsigned int component_idx; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ -+ if ((component_idx = sm6_value_get_constant_uint(operands[0])) >= 3) -+ { -+ WARN("Invalid component index %u.\n", component_idx); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid domain location component index %u.", component_idx); -+ component_idx = 0; -+ } -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ sm6_parser_dcl_register_builtin(sm6, VKD3DSIH_DCL_INPUT, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 3); -+ vsir_register_init(&src_param->reg, VKD3DSPR_TESSCOORD, VKD3D_DATA_FLOAT, 0); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param_init_scalar(src_param, component_idx); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_dot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -4989,18 +5066,43 @@ static void sm6_parser_emit_dx_tertiary(struct sm6_parser *sm6, enum dx_intrinsi - static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -+ bool is_control_point = op == DX_LOAD_OUTPUT_CONTROL_POINT; -+ bool is_patch_constant = op == DX_LOAD_PATCH_CONSTANT; - struct vkd3d_shader_instruction *ins = state->ins; -+ unsigned int count, row_index, column_index; -+ const struct vkd3d_shader_dst_param *params; - struct vkd3d_shader_src_param *src_param; - const struct shader_signature *signature; -- unsigned int row_index, column_index; - const struct signature_element *e; - - row_index = sm6_value_get_constant_uint(operands[0]); - column_index = sm6_value_get_constant_uint(operands[2]); - -+ if (is_control_point && operands[3]->is_undefined) -+ { -+ /* dxcompiler will compile source which does this, so let it pass. */ -+ WARN("Control point id is undefined.\n"); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND, -+ "The index for a control point load is undefined."); -+ } -+ - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); - -- signature = &sm6->p.program.input_signature; -+ if (is_patch_constant) -+ { -+ signature = &sm6->p.program.patch_constant_signature; -+ params = sm6->patch_constant_params; -+ } -+ else if (is_control_point) -+ { -+ signature = &sm6->p.program.output_signature; -+ params = sm6->output_params; -+ } -+ else -+ { -+ signature = &sm6->p.program.input_signature; -+ params = sm6->input_params; -+ } - if (row_index >= signature->element_count) - { - WARN("Invalid row index %u.\n", row_index); -@@ -5012,10 +5114,18 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin - - if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) - return; -- src_param->reg = sm6->input_params[row_index].reg; -+ src_param->reg = params[row_index].reg; - src_param_init_scalar(src_param, column_index); -+ count = 0; -+ - if (e->register_count > 1) -- register_index_address_init(&src_param->reg.idx[0], operands[1], sm6); -+ register_index_address_init(&src_param->reg.idx[count++], operands[1], sm6); -+ -+ if (!is_patch_constant && !operands[3]->is_undefined) -+ { -+ assert(src_param->reg.idx_count > count); -+ register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); -+ } - - instruction_dst_param_init_ssa_scalar(ins, sm6); - } -@@ -5040,6 +5150,12 @@ static void sm6_parser_emit_dx_make_double(struct sm6_parser *sm6, enum dx_intri - instruction_dst_param_init_ssa_scalar(ins, sm6); - } - -+static void sm6_parser_emit_dx_output_control_point_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT); -+} -+ - static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -5473,6 +5589,7 @@ static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intr - static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -+ bool is_patch_constant = op == DX_STORE_PATCH_CONSTANT; - struct vkd3d_shader_instruction *ins = state->ins; - struct vkd3d_shader_src_param *src_param; - struct vkd3d_shader_dst_param *dst_param; -@@ -5484,7 +5601,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr - row_index = sm6_value_get_constant_uint(operands[0]); - column_index = sm6_value_get_constant_uint(operands[2]); - -- signature = &sm6->p.program.output_signature; -+ signature = is_patch_constant ? &sm6->p.program.patch_constant_signature : &sm6->p.program.output_signature; - if (row_index >= signature->element_count) - { - WARN("Invalid row index %u.\n", row_index); -@@ -5516,7 +5633,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr - if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) - return; - dst_param_init_scalar(dst_param, column_index); -- dst_param->reg = sm6->output_params[row_index].reg; -+ dst_param->reg = is_patch_constant ? sm6->patch_constant_params[row_index].reg : sm6->output_params[row_index].reg; - if (e->register_count > 1) - register_index_address_init(&dst_param->reg.idx[0], operands[1], sm6); - -@@ -5736,6 +5853,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DISCARD ] = {"v", "1", sm6_parser_emit_dx_discard}, -+ [DX_DOMAIN_LOCATION ] = {"f", "c", sm6_parser_emit_dx_domain_location}, - [DX_DOT2 ] = {"g", "RRRR", sm6_parser_emit_dx_dot}, - [DX_DOT3 ] = {"g", "RRRRRR", sm6_parser_emit_dx_dot}, - [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, -@@ -5765,8 +5883,11 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_LEGACY_F16TOF32 ] = {"f", "i", sm6_parser_emit_dx_unary}, - [DX_LEGACY_F32TOF16 ] = {"i", "f", sm6_parser_emit_dx_unary}, - [DX_LOAD_INPUT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, -+ [DX_LOAD_OUTPUT_CONTROL_POINT ] = {"o", "ii8i", sm6_parser_emit_dx_load_input}, -+ [DX_LOAD_PATCH_CONSTANT ] = {"o", "ii8", sm6_parser_emit_dx_load_input}, - [DX_LOG ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, -+ [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, - [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, - [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, - [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, -@@ -5788,6 +5909,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_SPLIT_DOUBLE ] = {"S", "d", sm6_parser_emit_dx_split_double}, - [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, -+ [DX_STORE_PATCH_CONSTANT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, - [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_TEX2DMS_GET_SAMPLE_POS ] = {"o", "Hi", sm6_parser_emit_dx_get_sample_pos}, - [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, -@@ -7286,11 +7408,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - struct sm6_block *code_block; - struct sm6_value *dst; - -- if (sm6->function_count) -- { -- FIXME("Multiple functions are not supported yet.\n"); -- return VKD3D_ERROR_INVALID_SHADER; -- } - if (!(function->declaration = sm6_parser_next_function_definition(sm6))) - { - WARN("Failed to find definition to match function body.\n"); -@@ -8976,10 +9093,15 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons - { - return ret; - } -- /* TODO: patch constant signature in operand 2. */ -+ if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], -+ &sm6->p.program.patch_constant_signature, tessellator_domain)) < 0) -+ { -+ return ret; -+ } - - sm6_parser_init_input_signature(sm6, &sm6->p.program.input_signature); - sm6_parser_init_output_signature(sm6, &sm6->p.program.output_signature); -+ sm6_parser_init_patch_constant_signature(sm6, &sm6->p.program.patch_constant_signature); - - return VKD3D_OK; - } -@@ -9509,9 +9631,10 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 - static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *source_name, - struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) - { -+ size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; -+ const struct shader_signature *patch_constant_signature = &sm6->p.program.patch_constant_signature; - const struct shader_signature *output_signature = &sm6->p.program.output_signature; - const struct shader_signature *input_signature = &sm6->p.program.input_signature; -- size_t count, length, function_count, byte_code_size = dxbc_desc->byte_code_size; - const struct vkd3d_shader_location location = {.source_name = source_name}; - uint32_t version_token, dxil_version, token_count, magic; - const uint32_t *byte_code = dxbc_desc->byte_code; -@@ -9674,7 +9797,9 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *sou - } - - if (!(sm6->output_params = vsir_program_get_dst_params(&sm6->p.program, output_signature->element_count)) -- || !(sm6->input_params = vsir_program_get_dst_params(&sm6->p.program, input_signature->element_count))) -+ || !(sm6->input_params = vsir_program_get_dst_params(&sm6->p.program, input_signature->element_count)) -+ || !(sm6->patch_constant_params = vsir_program_get_dst_params(&sm6->p.program, -+ patch_constant_signature->element_count))) - { - ERR("Failed to allocate input/output parameters.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -@@ -9705,6 +9830,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *sou - "Out of memory allocating DXIL value array."); - return VKD3D_ERROR_OUT_OF_MEMORY; - } -+ sm6->function_count = 0; - sm6->ssa_next_id = 1; - - if ((ret = sm6_parser_globals_init(sm6)) < 0) -@@ -9754,7 +9880,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *sou - return ret; - } - -- if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count)) -+ if (!sm6_parser_require_space(sm6, output_signature->element_count + input_signature->element_count -+ + patch_constant_signature->element_count)) - { - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory emitting shader signature declarations."); -@@ -9771,9 +9898,41 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *sou - return VKD3D_ERROR_INVALID_SHADER; - } - -- assert(sm6->function_count == 1); -- if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) -- return ret; -+ if (version.type == VKD3D_SHADER_TYPE_HULL) -+ { -+ sm6_parser_add_instruction(sm6, VKD3DSIH_HS_CONTROL_POINT_PHASE); -+ -+ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) -+ return ret; -+ -+ if (!(fn = sm6_parser_get_function(sm6, sm6->patch_constant_function))) -+ { -+ WARN("Failed to find patch constant function '%s'.\n", sm6->patch_constant_function); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "Failed to find the patch constant function '%s' for a hull shader.", -+ sm6->patch_constant_function); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ sm6_parser_add_instruction(sm6, VKD3DSIH_HS_FORK_PHASE); -+ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) -+ return ret; -+ -+ expected_function_count = 2; -+ } -+ else -+ { -+ if ((ret = sm6_function_emit_blocks(fn, sm6)) < 0) -+ return ret; -+ expected_function_count = 1; -+ } -+ -+ if (sm6->function_count > expected_function_count) -+ { -+ FIXME("%zu unhandled functions.\n", sm6->function_count - expected_function_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "%zu functions were not emitted.", sm6->function_count - expected_function_count); -+ } - - dxil_block_destroy(&sm6->root_block); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 3e8dd2c486b..3e482a5fc70 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -79,6 +79,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: -+ case VKD3DSIH_NOP: - break; - case VKD3DSIH_RET: - shader_glsl_ret(generator, instruction); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 7a8fe4de437..5e3010c4353 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -392,7 +392,7 @@ struct hlsl_attribute - struct hlsl_reg_reservation - { - char reg_type; -- unsigned int reg_index; -+ unsigned int reg_space, reg_index; - - char offset_type; - unsigned int offset_index; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 0c196b77595..f99f322d8d1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -1197,17 +1197,18 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl - return true; - } - --static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) -+static bool parse_reservation_index(const char *string, char *type, uint32_t *index) - { -- struct hlsl_reg_reservation reservation = {0}; -+ if (!sscanf(string + 1, "%u", index)) -+ return false; - -- if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) -- { -- FIXME("Unsupported register reservation syntax.\n"); -- return reservation; -- } -- reservation.reg_type = ascii_tolower(reg_string[0]); -- return reservation; -+ *type = ascii_tolower(string[0]); -+ return true; -+} -+ -+static bool parse_reservation_space(const char *string, uint32_t *space) -+{ -+ return !ascii_strncasecmp(string, "space", 5) && sscanf(string + 5, "%u", space); - } - - static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, -@@ -5675,8 +5676,8 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %type param_list - %type parameters - --%type register_opt --%type packoffset_opt -+%type register_reservation -+%type packoffset_reservation - - %type texture_type texture_ms_type uav_type rov_type - -@@ -6300,12 +6301,12 @@ colon_attribute: - $$.reg_reservation.reg_type = 0; - $$.reg_reservation.offset_type = 0; - } -- | register_opt -+ | register_reservation - { - $$.semantic = (struct hlsl_semantic){0}; - $$.reg_reservation = $1; - } -- | packoffset_opt -+ | packoffset_reservation - { - $$.semantic = (struct hlsl_semantic){0}; - $$.reg_reservation = $1; -@@ -6327,22 +6328,57 @@ semantic: - } - - /* FIXME: Writemasks */ --register_opt: -+register_reservation: - ':' KW_REGISTER '(' any_identifier ')' - { -- $$ = parse_reg_reservation($4); -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ - vkd3d_free($4); - } - | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' - { -- FIXME("Ignoring shader target %s in a register reservation.\n", debugstr_a($4)); -+ memset(&$$, 0, sizeof($$)); -+ if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -+ } -+ else if (parse_reservation_space($6, &$$.reg_space)) -+ { -+ if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ } -+ else -+ { -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register or space reservation '%s'.", $6); -+ } -+ - vkd3d_free($4); -+ vkd3d_free($6); -+ } -+ | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); - -- $$ = parse_reg_reservation($6); -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $6); -+ -+ if (!parse_reservation_space($8, &$$.reg_space)) -+ hlsl_error(ctx, &@8, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register space reservation '%s'.", $8); -+ -+ vkd3d_free($4); - vkd3d_free($6); -+ vkd3d_free($8); - } - --packoffset_opt: -+packoffset_reservation: - ':' KW_PACKOFFSET '(' any_identifier ')' - { - $$ = parse_packoffset(ctx, $4, NULL, &@$); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index eca18f4eb28..1f8d60c62ac 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -636,11 +636,14 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont - return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; - } - --static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( -+struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( - struct vkd3d_shader_instruction_array *instructions) - { - struct vkd3d_shader_src_param *rel_addr; - -+ if (instructions->outpointid_param) -+ return instructions->outpointid_param; -+ - if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) - return NULL; - -@@ -648,6 +651,7 @@ static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( - rel_addr->swizzle = 0; - rel_addr->modifiers = 0; - -+ instructions->outpointid_param = rel_addr; - return rel_addr; - } - -@@ -3344,6 +3348,14 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - enum vkd3d_result ret; - size_t i; - -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) -+ { -+ FIXME("Hull shaders are not supported.\n"); -+ vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "The structurizer does not support hull shaders."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ - memset(cfg, 0, sizeof(*cfg)); - cfg->message_context = message_context; - cfg->program = program; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 1f4320968d3..4434e6e98f2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -202,6 +202,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_WARNING_DXIL_INVALID_MASK = 8307, - VKD3D_SHADER_WARNING_DXIL_INVALID_OPERATION = 8308, - VKD3D_SHADER_WARNING_DXIL_IGNORING_ATTACHMENT = 8309, -+ VKD3D_SHADER_WARNING_DXIL_UNDEFINED_OPERAND = 8310, - - VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, - VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER = 9001, -@@ -1300,6 +1301,8 @@ struct vkd3d_shader_instruction_array - struct vkd3d_shader_immediate_constant_buffer **icbs; - size_t icb_capacity; - size_t icb_count; -+ -+ struct vkd3d_shader_src_param *outpointid_param; - }; - - bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); -@@ -1310,6 +1313,8 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins - struct vkd3d_shader_immediate_constant_buffer *icb); - bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, - unsigned int dst, unsigned int src); -+struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( -+ struct vkd3d_shader_instruction_array *instructions); - void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); - - enum vkd3d_shader_config_flags -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index e0a7acb306d..5f60c8d90ad 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -24,10 +24,6 @@ - #define VK_NO_PROTOTYPES - #define CONST_VTABLE - --#ifdef _WIN32 --# define _WIN32_WINNT 0x0600 /* for condition variables */ --#endif -- - #include "vkd3d_common.h" - #include "vkd3d_blob.h" - #include "vkd3d_memory.h" -@@ -205,36 +201,6 @@ union vkd3d_thread_handle - void *handle; - }; - --struct vkd3d_cond --{ -- CONDITION_VARIABLE cond; --}; -- --static inline void vkd3d_cond_init(struct vkd3d_cond *cond) --{ -- InitializeConditionVariable(&cond->cond); --} -- --static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) --{ -- WakeConditionVariable(&cond->cond); --} -- --static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) --{ -- WakeAllConditionVariable(&cond->cond); --} -- --static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) --{ -- if (!SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE)) -- ERR("Could not sleep on the condition variable, error %lu.\n", GetLastError()); --} -- --static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) --{ --} -- - static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) - { - return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; -@@ -265,56 +231,6 @@ union vkd3d_thread_handle - void *handle; - }; - --struct vkd3d_cond --{ -- pthread_cond_t cond; --}; -- --static inline void vkd3d_cond_init(struct vkd3d_cond *cond) --{ -- int ret; -- -- ret = pthread_cond_init(&cond->cond, NULL); -- if (ret) -- ERR("Could not initialize the condition variable, error %d.\n", ret); --} -- --static inline void vkd3d_cond_signal(struct vkd3d_cond *cond) --{ -- int ret; -- -- ret = pthread_cond_signal(&cond->cond); -- if (ret) -- ERR("Could not signal the condition variable, error %d.\n", ret); --} -- --static inline void vkd3d_cond_broadcast(struct vkd3d_cond *cond) --{ -- int ret; -- -- ret = pthread_cond_broadcast(&cond->cond); -- if (ret) -- ERR("Could not broadcast the condition variable, error %d.\n", ret); --} -- --static inline void vkd3d_cond_wait(struct vkd3d_cond *cond, struct vkd3d_mutex *lock) --{ -- int ret; -- -- ret = pthread_cond_wait(&cond->cond, &lock->lock); -- if (ret) -- ERR("Could not wait on the condition variable, error %d.\n", ret); --} -- --static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) --{ -- int ret; -- -- ret = pthread_cond_destroy(&cond->cond); -- if (ret) -- ERR("Could not destroy the condition variable, error %d.\n", ret); --} -- - # if HAVE_SYNC_BOOL_COMPARE_AND_SWAP - static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) - { --- -2.43.0 - diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch deleted file mode 100644 index 6cdf5833..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch +++ /dev/null @@ -1,892 +0,0 @@ -From ddfe189d39a3dd3f1c99773c57bb0650e39e8354 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 24 Apr 2024 09:05:20 +1000 -Subject: [PATCH] Updated vkd3d to 46fca3f9f4a9b47b32e9dfbacda0f3d19536c02c. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 2 + - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 12 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 77 +++++++---- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 44 +++++-- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 12 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 32 +++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 5 + - libs/vkd3d/libs/vkd3d-shader/ir.c | 137 ++++++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 5 + - 10 files changed, 248 insertions(+), 80 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 1da73bcfb2e..b0e9230dab6 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -340,6 +340,8 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) - return (x > y) - (x < y); - } - -+#define VKD3D_BITMAP_SIZE(x) (((x) + 0x1f) >> 5) -+ - static inline bool bitmap_clear(uint32_t *map, unsigned int idx) - { - return map[idx >> 5] &= ~(1u << (idx & 0x1f)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index ace7694a59e..55d9ecf707d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -216,7 +216,7 @@ struct vkd3d_shader_sm1_parser - struct vkd3d_shader_parser p; - - #define MAX_CONSTANT_COUNT 8192 -- uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; -+ uint32_t constant_def_mask[3][VKD3D_BITMAP_SIZE(MAX_CONSTANT_COUNT)]; - }; - - /* This table is not order or position dependent. */ -@@ -1517,6 +1517,11 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - return D3DXPC_OBJECT; -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: - break; -@@ -1614,6 +1619,11 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_STRING: - return D3DXPT_STRING; - -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index d07d5adee70..6a1fb6bddb7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -805,7 +805,7 @@ struct sm6_parser - struct vkd3d_shader_dst_param *output_params; - struct vkd3d_shader_dst_param *input_params; - struct vkd3d_shader_dst_param *patch_constant_params; -- uint32_t io_regs_declared[(VKD3DSPR_COUNT + 0x1f) / 0x20]; -+ uint32_t io_regs_declared[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; - - struct sm6_function *functions; - size_t function_count; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 98443797543..168378e6b42 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -222,7 +222,7 @@ static bool technique_matches_version(const struct hlsl_ir_var *var, const struc - { - const struct hlsl_type *type = var->data_type; - -- if (type->base_type != HLSL_TYPE_TECHNIQUE) -+ if (type->class != HLSL_CLASS_TECHNIQUE) - return false; - - return type->e.version >= fx->min_technique_version && type->e.version <= fx->max_technique_version; -@@ -353,8 +353,6 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - { - [HLSL_TYPE_PIXELSHADER] = "PixelShader", - [HLSL_TYPE_VERTEXSHADER] = "VertexShader", -- [HLSL_TYPE_RENDERTARGETVIEW] = "RenderTargetView", -- [HLSL_TYPE_DEPTHSTENCILVIEW] = "DepthStencilView", - }; - static const char * const texture_type_names[] = - { -@@ -380,19 +378,30 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", - }; - -- if (type->class == HLSL_CLASS_TEXTURE) -- return texture_type_names[type->sampler_dim]; -+ switch (type->class) -+ { -+ case HLSL_CLASS_TEXTURE: -+ return texture_type_names[type->sampler_dim]; - -- if (type->class == HLSL_CLASS_UAV) -- return uav_type_names[type->sampler_dim]; -+ case HLSL_CLASS_UAV: -+ return uav_type_names[type->sampler_dim]; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ return "DepthStencilView"; -+ -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ return "RenderTargetView"; -+ -+ case HLSL_CLASS_OBJECT: -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_PIXELSHADER: -+ case HLSL_TYPE_VERTEXSHADER: -+ return object_type_names[type->base_type]; -+ default: -+ return type->name; -+ } - -- switch (type->base_type) -- { -- case HLSL_TYPE_PIXELSHADER: -- case HLSL_TYPE_VERTEXSHADER: -- case HLSL_TYPE_RENDERTARGETVIEW: -- case HLSL_TYPE_DEPTHSTENCILVIEW: -- return object_type_names[type->base_type]; - default: - return type->name; - } -@@ -426,7 +435,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - put_u32_unaligned(buffer, 1); - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - put_u32_unaligned(buffer, 2); -@@ -437,6 +448,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - break; - - case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: - vkd3d_unreachable(); - - case HLSL_CLASS_SAMPLER: -@@ -510,21 +524,25 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - - put_u32_unaligned(buffer, uav_type[type->sampler_dim]); - } -+ else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) -+ { -+ put_u32_unaligned(buffer, 20); -+ } -+ else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW) -+ { -+ put_u32_unaligned(buffer, 19); -+ } - else if (type->class == HLSL_CLASS_OBJECT) - { - static const uint32_t object_type[] = - { - [HLSL_TYPE_PIXELSHADER] = 5, - [HLSL_TYPE_VERTEXSHADER] = 6, -- [HLSL_TYPE_RENDERTARGETVIEW] = 19, -- [HLSL_TYPE_DEPTHSTENCILVIEW] = 20, - }; - - switch (type->base_type) - { -- case HLSL_TYPE_DEPTHSTENCILVIEW: - case HLSL_TYPE_PIXELSHADER: -- case HLSL_TYPE_RENDERTARGETVIEW: - case HLSL_TYPE_VERTEXSHADER: - put_u32_unaligned(buffer, object_type[type->base_type]); - break; -@@ -533,11 +551,17 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - return 0; - } - } -- else /* Numeric type */ -+ else if (hlsl_is_numeric_type(type)) - { - numeric_desc = get_fx_4_numeric_type_description(type, fx); - put_u32_unaligned(buffer, numeric_desc); - } -+ else -+ { -+ FIXME("Type %u is not supported.\n", type->class); -+ set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -+ return 0; -+ } - - return offset; - } -@@ -618,7 +642,7 @@ static void write_groups(struct fx_write_context *fx) - { - const struct hlsl_type *type = var->data_type; - -- if (type->base_type == HLSL_TYPE_EFFECT_GROUP) -+ if (type->class == HLSL_CLASS_EFFECT_GROUP) - write_group(var, fx); - } - } -@@ -831,9 +855,17 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); - return false; - -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_UAV: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_VOID: - return false; -+ -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: -+ /* This cannot appear as an extern variable. */ -+ break; - } - - vkd3d_unreachable(); -@@ -1011,6 +1043,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - /* Initializer */ - switch (type->class) - { -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - break; -@@ -1018,8 +1051,6 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - case HLSL_CLASS_OBJECT: - switch (type->base_type) - { -- case HLSL_TYPE_RENDERTARGETVIEW: -- break; - case HLSL_TYPE_PIXELSHADER: - case HLSL_TYPE_VERTEXSHADER: - /* FIXME: write shader blobs, once parser support works. */ -@@ -1118,6 +1149,7 @@ static bool is_object_variable(const struct hlsl_ir_var *var) - - switch (type->class) - { -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -@@ -1128,7 +1160,6 @@ static bool is_object_variable(const struct hlsl_ir_var *var) - { - case HLSL_TYPE_PIXELSHADER: - case HLSL_TYPE_VERTEXSHADER: -- case HLSL_TYPE_RENDERTARGETVIEW: - return true; - default: - return false; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 5dd80ff1c3f..4fc1493bdce 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -363,8 +363,13 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - type->reg_size[HLSL_REGSET_UAVS] = 1; - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - break; - } -@@ -425,10 +430,12 @@ static bool type_is_single_component(const struct hlsl_type *type) - { - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_OBJECT: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - return true; -@@ -439,6 +446,9 @@ static bool type_is_single_component(const struct hlsl_type *type) - case HLSL_CLASS_ARRAY: - return false; - -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - break; - } -@@ -561,7 +571,9 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - } - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: -@@ -569,6 +581,9 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - assert(idx == 0); - break; - -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - vkd3d_unreachable(); - } -@@ -934,13 +949,18 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_ARRAY: - return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; - -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - return 1; - -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - break; - } -@@ -997,9 +1017,9 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - if (t1->class == HLSL_CLASS_ARRAY) - return t1->e.array.elements_count == t2->e.array.elements_count - && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); -- if (t1->class == HLSL_CLASS_OBJECT) -+ if (t1->class == HLSL_CLASS_TECHNIQUE) - { -- if (t1->base_type == HLSL_TYPE_TECHNIQUE && t1->e.version != t2->e.version) -+ if (t1->e.version != t2->e.version) - return false; - } - -@@ -1089,9 +1109,8 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - type->e.resource.format = old->e.resource.format; - break; - -- case HLSL_CLASS_OBJECT: -- if (type->base_type == HLSL_TYPE_TECHNIQUE) -- type->e.version = old->e.version; -+ case HLSL_CLASS_TECHNIQUE: -+ type->e.version = old->e.version; - break; - - default: -@@ -2355,9 +2374,14 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - } - return string; - -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - break; - } -@@ -3533,12 +3557,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, - {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, - {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, -- {"fxgroup", HLSL_CLASS_OBJECT, HLSL_TYPE_EFFECT_GROUP, 1, 1}, -- {"pass", HLSL_CLASS_OBJECT, HLSL_TYPE_PASS, 1, 1}, - {"pixelshader", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, - {"vertexshader", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, -- {"RenderTargetView",HLSL_CLASS_OBJECT, HLSL_TYPE_RENDERTARGETVIEW, 1, 1}, -- {"DepthStencilView",HLSL_CLASS_OBJECT, HLSL_TYPE_DEPTHSTENCILVIEW, 1, 1}, - }; - - static const struct -@@ -3650,6 +3670,10 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - } - - ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); - -@@ -3662,7 +3686,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - for (i = 0; i < ARRAY_SIZE(technique_types); ++i) - { -- type = hlsl_new_type(ctx, technique_types[i].name, HLSL_CLASS_OBJECT, HLSL_TYPE_TECHNIQUE, 1, 1); -+ type = hlsl_new_simple_type(ctx, technique_types[i].name, HLSL_CLASS_TECHNIQUE); - type->e.version = technique_types[i].version; - hlsl_scope_add_type(ctx->globals, type); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 5e3010c4353..1e5f0805152 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -79,8 +79,13 @@ enum hlsl_type_class - HLSL_CLASS_STRUCT, - HLSL_CLASS_ARRAY, - HLSL_CLASS_OBJECT, -+ HLSL_CLASS_DEPTH_STENCIL_VIEW, -+ HLSL_CLASS_EFFECT_GROUP, -+ HLSL_CLASS_PASS, -+ HLSL_CLASS_RENDER_TARGET_VIEW, - HLSL_CLASS_SAMPLER, - HLSL_CLASS_STRING, -+ HLSL_CLASS_TECHNIQUE, - HLSL_CLASS_TEXTURE, - HLSL_CLASS_UAV, - HLSL_CLASS_VOID, -@@ -97,11 +102,6 @@ enum hlsl_base_type - HLSL_TYPE_LAST_SCALAR = HLSL_TYPE_BOOL, - HLSL_TYPE_PIXELSHADER, - HLSL_TYPE_VERTEXSHADER, -- HLSL_TYPE_PASS, -- HLSL_TYPE_RENDERTARGETVIEW, -- HLSL_TYPE_DEPTHSTENCILVIEW, -- HLSL_TYPE_TECHNIQUE, -- HLSL_TYPE_EFFECT_GROUP, - }; - - enum hlsl_sampler_dim -@@ -145,8 +145,6 @@ struct hlsl_type - enum hlsl_type_class class; - /* If class is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. - * If class is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. -- * If class is HLSL_CLASS_OBJECT and base_type is HLSL_TYPE_TECHNIQUE, additional version -- * field is used to distinguish between technique types. - * Otherwise, base_type is not used. */ - enum hlsl_base_type base_type; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index f99f322d8d1..0eed15c5a91 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -656,6 +656,16 @@ static unsigned int initializer_size(const struct parse_initializer *initializer - return count; - } - -+static void cleanup_parse_attribute_list(struct parse_attribute_list *attr_list) -+{ -+ unsigned int i = 0; -+ -+ assert(attr_list); -+ for (i = 0; i < attr_list->count; ++i) -+ hlsl_free_attribute((struct hlsl_attribute *) attr_list->attrs[i]); -+ vkd3d_free(attr_list->attrs); -+} -+ - static void free_parse_initializer(struct parse_initializer *initializer) - { - destroy_block(initializer->instrs); -@@ -6033,11 +6043,7 @@ attribute_list: - $$ = $1; - if (!(new_array = vkd3d_realloc($$.attrs, ($$.count + 1) * sizeof(*$$.attrs)))) - { -- unsigned int i; -- -- for (i = 0; i < $$.count; ++i) -- hlsl_free_attribute((void *)$$.attrs[i]); -- vkd3d_free($$.attrs); -+ cleanup_parse_attribute_list(&$$); - YYABORT; - } - $$.attrs = new_array; -@@ -6243,11 +6249,7 @@ func_prototype: - } - else - { -- unsigned int i; -- -- for (i = 0; i < $1.count; ++i) -- hlsl_free_attribute((void *)$1.attrs[i]); -- vkd3d_free($1.attrs); -+ cleanup_parse_attribute_list(&$1); - } - $$ = $2; - } -@@ -7358,6 +7360,7 @@ selection_statement: - { - destroy_block($6.then_block); - destroy_block($6.else_block); -+ cleanup_parse_attribute_list(&$1); - YYABORT; - } - -@@ -7365,10 +7368,12 @@ selection_statement: - { - destroy_block($6.then_block); - destroy_block($6.else_block); -+ cleanup_parse_attribute_list(&$1); - YYABORT; - } - destroy_block($6.then_block); - destroy_block($6.else_block); -+ cleanup_parse_attribute_list(&$1); - - $$ = $4; - hlsl_block_add_instr($$, instr); -@@ -7391,21 +7396,25 @@ loop_statement: - { - $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - | attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';' - { - $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - | attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement - { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - | attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement - { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - - switch_statement: -@@ -7418,6 +7427,7 @@ switch_statement: - { - destroy_switch_cases($8); - destroy_block($5); -+ cleanup_parse_attribute_list(&$1); - YYABORT; - } - -@@ -7428,6 +7438,7 @@ switch_statement: - if (!s) - { - destroy_block($5); -+ cleanup_parse_attribute_list(&$1); - YYABORT; - } - -@@ -7435,6 +7446,7 @@ switch_statement: - hlsl_block_add_instr($$, s); - - hlsl_pop_scope(ctx); -+ cleanup_parse_attribute_list(&$1); - } - - switch_case: -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index f6cccfe8bea..a6d6b336b40 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -1638,7 +1638,12 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - * matrices yet. */ - return false; - -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - vkd3d_unreachable(); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 1f8d60c62ac..59b74c065d8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -2917,8 +2917,7 @@ static enum vkd3d_result vsir_block_init(struct vsir_block *block, unsigned int - if (block_count > SIZE_MAX - (sizeof(*block->dominates) * CHAR_BIT - 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -- block_count = align(block_count, sizeof(*block->dominates) * CHAR_BIT); -- byte_count = block_count / CHAR_BIT; -+ byte_count = VKD3D_BITMAP_SIZE(block_count) * sizeof(*block->dominates); - - assert(label); - memset(block, 0, sizeof(*block)); -@@ -3098,6 +3097,8 @@ struct vsir_cfg - { - struct vkd3d_shader_message_context *message_context; - struct vsir_program *program; -+ size_t function_begin; -+ size_t function_end; - struct vsir_block *blocks; - struct vsir_block *entry; - size_t block_count; -@@ -3342,25 +3343,19 @@ static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) - } - - static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) -+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, -+ size_t *pos) - { - struct vsir_block *current_block = NULL; - enum vkd3d_result ret; - size_t i; - -- if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) -- { -- FIXME("Hull shaders are not supported.\n"); -- vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -- "The structurizer does not support hull shaders."); -- return VKD3D_ERROR_INVALID_SHADER; -- } -- - memset(cfg, 0, sizeof(*cfg)); - cfg->message_context = message_context; - cfg->program = program; - cfg->block_count = program->block_count; - cfg->target = target; -+ cfg->function_begin = *pos; - - vsir_block_list_init(&cfg->order); - -@@ -3370,9 +3365,10 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - if (TRACE_ON()) - vkd3d_string_buffer_init(&cfg->debug_buffer); - -- for (i = 0; i < program->instructions.count; ++i) -+ for (i = *pos; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; -+ bool finish = false; - - switch (instruction->handler_idx) - { -@@ -3404,11 +3400,24 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - current_block = NULL; - break; - -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ assert(!current_block); -+ finish = true; -+ break; -+ - default: - break; - } -+ -+ if (finish) -+ break; - } - -+ *pos = i; -+ cfg->function_end = *pos; -+ - for (i = 0; i < cfg->block_count; ++i) - { - struct vsir_block *block = &cfg->blocks[i]; -@@ -4881,12 +4890,13 @@ static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) - } - - static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target) -+ struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target, -+ size_t *pos) - { - enum vkd3d_result ret; - struct vsir_cfg cfg; - -- if ((ret = vsir_cfg_init(&cfg, program, message_context, target)) < 0) -+ if ((ret = vsir_cfg_init(&cfg, program, message_context, target, pos)) < 0) - return ret; - - vsir_cfg_compute_dominators(&cfg); -@@ -4919,7 +4929,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - { - struct vsir_cfg_emit_target target = {0}; - enum vkd3d_result ret; -- unsigned int i; -+ size_t i; - - target.jump_target_temp_idx = program->temp_count; - target.temp_count = program->temp_count + 1; -@@ -4927,19 +4937,41 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -- /* Copy declarations until the first block. */ -- for (i = 0; i < program->instructions.count; ++i) -+ for (i = 0; i < program->instructions.count;) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- if (ins->handler_idx == VKD3DSIH_LABEL) -- break; -+ switch (ins->handler_idx) -+ { -+ case VKD3DSIH_LABEL: -+ assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -+ TRACE("Structurizing a non-hull shader.\n"); -+ if ((ret = vsir_program_structurize_function(program, message_context, -+ &target, &i)) < 0) -+ goto fail; -+ assert(i == program->instructions.count); -+ break; - -- target.instructions[target.ins_count++] = *ins; -- } -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); -+ target.instructions[target.ins_count++] = *ins; -+ ++i; -+ if ((ret = vsir_program_structurize_function(program, message_context, -+ &target, &i)) < 0) -+ goto fail; -+ break; - -- if ((ret = vsir_program_structurize_function(program, message_context, &target)) < 0) -- goto fail; -+ default: -+ if (!reserve_instructions(&target.instructions, &target.ins_capacity, target.ins_count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ target.instructions[target.ins_count++] = *ins; -+ ++i; -+ break; -+ } -+ } - - vkd3d_free(program->instructions.elements); - program->instructions.elements = target.instructions; -@@ -5001,6 +5033,9 @@ static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct v - struct vsir_block *block = &cfg->blocks[i]; - struct vkd3d_shader_instruction *ins; - -+ if (block->label == 0) -+ continue; -+ - for (ins = block->begin; ins <= block->end; ++ins) - { - for (j = 0; j < ins->dst_count; ++j) -@@ -5016,6 +5051,9 @@ static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct v - struct vsir_block *block = &cfg->blocks[i]; - struct vkd3d_shader_instruction *ins; - -+ if (block->label == 0) -+ continue; -+ - for (ins = block->begin; ins <= block->end; ++ins) - { - for (j = 0; j < ins->src_count; ++j) -@@ -5028,7 +5066,7 @@ static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct v - - TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count); - -- for (i = 0; i < program->instructions.count; ++i) -+ for (i = cfg->function_begin; i < cfg->function_end; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -@@ -5047,13 +5085,14 @@ done: - return VKD3D_OK; - } - --static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, -- struct vkd3d_shader_message_context *message_context) -+static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_function( -+ struct vsir_program *program, struct vkd3d_shader_message_context *message_context, -+ size_t *pos) - { - enum vkd3d_result ret; - struct vsir_cfg cfg; - -- if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL)) < 0) -+ if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL, pos)) < 0) - return ret; - - vsir_cfg_compute_dominators(&cfg); -@@ -5065,6 +5104,47 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - return ret; - } - -+static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ enum vkd3d_result ret; -+ size_t i; -+ -+ for (i = 0; i < program->instructions.count;) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ -+ switch (ins->handler_idx) -+ { -+ case VKD3DSIH_LABEL: -+ assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -+ TRACE("Materializing undominated SSAs in a non-hull shader.\n"); -+ if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( -+ program, message_context, &i)) < 0) -+ return ret; -+ assert(i == program->instructions.count); -+ break; -+ -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); -+ ++i; -+ if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( -+ program, message_context, &i)) < 0) -+ return ret; -+ break; -+ -+ default: -+ ++i; -+ break; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ - struct validation_context - { - struct vkd3d_shader_message_context *message_context; -@@ -5890,7 +5970,8 @@ static void vsir_validate_instruction(struct validation_context *ctx) - unsigned int value_idx = 2 * i; - unsigned int label_idx = 2 * i + 1; - -- if (!register_is_constant(&instruction->src[value_idx].reg) && !register_is_ssa(&instruction->src[value_idx].reg)) -+ if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) -+ && !register_is_ssa(&instruction->src[value_idx].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid value register for incoming %zu of type %#x in PHI instruction, " - "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 6ee06c02d74..708ab6268a7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -3006,10 +3006,15 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - return D3D_SVC_VECTOR; - - case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_STRUCT: - case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: --- -2.43.0 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch deleted file mode 100644 index f80ec902..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch +++ /dev/null @@ -1,953 +0,0 @@ -From e91e957dbee71d7729e7e6fe7aa6c04bf13c360b Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Sun, 28 Apr 2024 09:46:44 +1000 -Subject: [PATCH] Updated vkd3d to 13e1491941a1af32ddfc1019fa304231fd121c4d. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 63 +++++++ - libs/vkd3d/include/vkd3d_types.h | 6 + - libs/vkd3d/libs/vkd3d-common/blob.c | 3 + - libs/vkd3d/libs/vkd3d-common/error.c | 6 + - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 61 ++++++- - libs/vkd3d/libs/vkd3d-shader/ir.c | 122 +++++++++++++- - libs/vkd3d/libs/vkd3d/cache.c | 195 ++++++++++++++++++++++ - libs/vkd3d/libs/vkd3d/device.c | 36 +++- - libs/vkd3d/libs/vkd3d/resource.c | 24 +-- - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 2 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 92 ++-------- - 11 files changed, 497 insertions(+), 113 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index b0e9230dab6..2d950b4f7aa 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -340,6 +340,11 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) - return (x > y) - (x < y); - } - -+static inline int vkd3d_u64_compare(uint64_t x, uint64_t y) -+{ -+ return (x > y) - (x < y); -+} -+ - #define VKD3D_BITMAP_SIZE(x) (((x) + 0x1f) >> 5) - - static inline bool bitmap_clear(uint32_t *map, unsigned int idx) -@@ -431,6 +436,64 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) - return vkd3d_atomic_add_fetch_u32(x, 1); - } - -+static inline bool vkd3d_atomic_compare_exchange_u32(uint32_t volatile *x, uint32_t expected, uint32_t val) -+{ -+#if HAVE_SYNC_BOOL_COMPARE_AND_SWAP -+ return __sync_bool_compare_and_swap(x, expected, val); -+#elif defined(_WIN32) -+ return InterlockedCompareExchange((LONG *)x, val, expected) == expected; -+#else -+# error "vkd3d_atomic_compare_exchange_u32() not implemented for this platform" -+#endif -+} -+ -+static inline bool vkd3d_atomic_compare_exchange_ptr(void * volatile *x, void *expected, void *val) -+{ -+#if HAVE_SYNC_BOOL_COMPARE_AND_SWAP -+ return __sync_bool_compare_and_swap(x, expected, val); -+#elif defined(_WIN32) -+ return InterlockedCompareExchangePointer(x, val, expected) == expected; -+#else -+# error "vkd3d_atomic_compare_exchange_ptr() not implemented for this platform" -+#endif -+} -+ -+static inline uint32_t vkd3d_atomic_exchange_u32(uint32_t volatile *x, uint32_t val) -+{ -+#if HAVE_ATOMIC_EXCHANGE_N -+ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); -+#elif defined(_WIN32) -+ return InterlockedExchange((LONG *)x, val); -+#else -+ uint32_t expected; -+ -+ do -+ { -+ expected = *x; -+ } while (!vkd3d_atomic_compare_exchange_u32(x, expected, val)); -+ -+ return expected; -+#endif -+} -+ -+static inline void *vkd3d_atomic_exchange_ptr(void * volatile *x, void *val) -+{ -+#if HAVE_ATOMIC_EXCHANGE_N -+ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); -+#elif defined(_WIN32) -+ return InterlockedExchangePointer(x, val); -+#else -+ void *expected; -+ -+ do -+ { -+ expected = *x; -+ } while (!vkd3d_atomic_compare_exchange_ptr(x, expected, val)); -+ -+ return expected; -+#endif -+} -+ - struct vkd3d_mutex - { - #ifdef _WIN32 -diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h -index 017eaf11806..dc5a7c064ae 100644 ---- a/libs/vkd3d/include/vkd3d_types.h -+++ b/libs/vkd3d/include/vkd3d_types.h -@@ -53,6 +53,12 @@ enum vkd3d_result - VKD3D_ERROR_INVALID_SHADER = -4, - /** The operation is not implemented in this version of vkd3d. */ - VKD3D_ERROR_NOT_IMPLEMENTED = -5, -+ /** The object or entry already exists. \since 1.12 */ -+ VKD3D_ERROR_KEY_ALREADY_EXISTS = -6, -+ /** The requested object was not found. \since 1.12 */ -+ VKD3D_ERROR_NOT_FOUND = -7, -+ /** The output buffer is larger than the requested object \since 1.12. */ -+ VKD3D_ERROR_MORE_DATA = -8, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_RESULT), - }; -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index 6bc95dc55c4..c2c6ad67804 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -16,6 +16,9 @@ - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -+#ifndef __MINGW32__ -+#define WIDL_C_INLINE_WRAPPERS -+#endif - #define COBJMACROS - - #define CONST_VTABLE -diff --git a/libs/vkd3d/libs/vkd3d-common/error.c b/libs/vkd3d/libs/vkd3d-common/error.c -index b8350a5404c..2f978c4977d 100644 ---- a/libs/vkd3d/libs/vkd3d-common/error.c -+++ b/libs/vkd3d/libs/vkd3d-common/error.c -@@ -35,6 +35,12 @@ HRESULT hresult_from_vkd3d_result(int vkd3d_result) - return E_INVALIDARG; - case VKD3D_ERROR_NOT_IMPLEMENTED: - return E_NOTIMPL; -+ case VKD3D_ERROR_KEY_ALREADY_EXISTS: -+ return DXGI_ERROR_ALREADY_EXISTS; -+ case VKD3D_ERROR_NOT_FOUND: -+ return DXGI_ERROR_NOT_FOUND; -+ case VKD3D_ERROR_MORE_DATA: -+ return DXGI_ERROR_MORE_DATA; - default: - FIXME("Unhandled vkd3d result %d.\n", vkd3d_result); - return E_FAIL; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 55d9ecf707d..09e4f596241 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1813,6 +1813,7 @@ static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) - struct sm1_instruction - { - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; -+ unsigned int flags; - - struct sm1_dst_register - { -@@ -1852,6 +1853,8 @@ static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu - uint32_t token = instr->opcode; - unsigned int i; - -+ token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); -+ - if (ctx->profile->major_version > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); -@@ -2414,6 +2417,49 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - -+static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_block *block); -+ -+static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_if *iff = hlsl_ir_if(instr); -+ const struct hlsl_ir_node *condition; -+ struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; -+ -+ condition = iff->condition.node; -+ assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); -+ -+ sm1_ifc = (struct sm1_instruction) -+ { -+ .opcode = D3DSIO_IFC, -+ .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), -+ .srcs[0].reg = condition->reg.id, -+ .srcs[0].mod = 0, -+ -+ .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), -+ .srcs[1].reg = condition->reg.id, -+ .srcs[1].mod = D3DSPSM_NEG, -+ -+ .src_count = 2, -+ }; -+ write_sm1_instruction(ctx, buffer, &sm1_ifc); -+ write_sm1_block(ctx, buffer, &iff->then_block); -+ -+ if (!list_empty(&iff->else_block.instrs)) -+ { -+ sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; -+ write_sm1_instruction(ctx, buffer, &sm1_else); -+ write_sm1_block(ctx, buffer, &iff->else_block); -+ } -+ -+ sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; -+ write_sm1_instruction(ctx, buffer, &sm1_endif); -+} -+ - static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -@@ -2614,12 +2660,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - write_sm1_instruction(ctx, buffer, &sm1_instr); - } - --static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_function_decl *entry_func) -+static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_block *block) - { - const struct hlsl_ir_node *instr; - -- LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { -@@ -2643,6 +2689,13 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - write_sm1_expr(ctx, buffer, instr); - break; - -+ case HLSL_IR_IF: -+ if (hlsl_version_ge(ctx, 2, 1)) -+ write_sm1_if(ctx, buffer, instr); -+ else -+ hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); -+ break; -+ - case HLSL_IR_JUMP: - write_sm1_jump(ctx, buffer, instr); - break; -@@ -2680,7 +2733,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun - write_sm1_constant_defs(ctx, &buffer); - write_sm1_semantic_dcls(ctx, &buffer); - write_sm1_sampler_dcls(ctx, &buffer); -- write_sm1_instructions(ctx, &buffer, entry_func); -+ write_sm1_block(ctx, &buffer, &entry_func->body); - - put_u32(&buffer, D3DSIO_END); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 59b74c065d8..121b0fe3a6c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -82,6 +82,106 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, - return true; - } - -+static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type, -+ enum vkd3d_shader_opcode *opcode, bool *requires_swap) -+{ -+ switch (rel_op) -+ { -+ case VKD3D_SHADER_REL_OP_LT: -+ case VKD3D_SHADER_REL_OP_GT: -+ *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_GT); -+ if (data_type == VKD3D_DATA_FLOAT) -+ { -+ *opcode = VKD3DSIH_LTO; -+ return true; -+ } -+ break; -+ -+ case VKD3D_SHADER_REL_OP_GE: -+ case VKD3D_SHADER_REL_OP_LE: -+ *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_LE); -+ if (data_type == VKD3D_DATA_FLOAT) -+ { -+ *opcode = VKD3DSIH_GEO; -+ return true; -+ } -+ break; -+ -+ case VKD3D_SHADER_REL_OP_EQ: -+ *requires_swap = false; -+ if (data_type == VKD3D_DATA_FLOAT) -+ { -+ *opcode = VKD3DSIH_EQO; -+ return true; -+ } -+ break; -+ -+ case VKD3D_SHADER_REL_OP_NE: -+ *requires_swap = false; -+ if (data_type == VKD3D_DATA_FLOAT) -+ { -+ *opcode = VKD3DSIH_NEO; -+ return true; -+ } -+ break; -+ } -+ return false; -+} -+ -+static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, -+ struct vkd3d_shader_instruction *ifc, unsigned int *tmp_idx, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = ifc - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_opcode opcode; -+ bool swap; -+ -+ if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ if (*tmp_idx == ~0u) -+ *tmp_idx = program->temp_count++; -+ -+ /* Replace ifc comparison with actual comparison, saving the result in the tmp register. */ -+ if (!(get_opcode_from_rel_op(ifc->flags, ifc->src[0].reg.data_type, &opcode, &swap))) -+ { -+ vkd3d_shader_error(message_context, &ifc->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Aborting due to not yet implemented feature: opcode for rel_op %u and data type %u.", -+ ifc->flags, ifc->src[0].reg.data_type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ -+ ins = &instructions->elements[pos + 1]; -+ if (!vsir_instruction_init_with_params(program, ins, &ifc->location, opcode, 1, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].reg.idx[0].offset = *tmp_idx; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; -+ -+ ins->src[0] = ifc->src[swap]; -+ ins->src[1] = ifc->src[!swap]; -+ -+ /* Create new if instruction using the previous result. */ -+ ins = &instructions->elements[pos + 2]; -+ if (!vsir_instruction_init_with_params(program, ins, &ifc->location, VKD3DSIH_IF, 0, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; -+ -+ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].reg.idx[0].offset = *tmp_idx; -+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ -+ /* Make the original instruction no-op */ -+ vkd3d_shader_instruction_make_nop(ifc); -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, - struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) - { -@@ -210,7 +310,8 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro - return VKD3D_OK; - } - --static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) -+static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_instruction_array *instructions = &program->instructions; - unsigned int tmp_idx = ~0u, i; -@@ -222,6 +323,11 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - - switch (ins->handler_idx) - { -+ case VKD3DSIH_IFC: -+ if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) -+ return ret; -+ break; -+ - case VKD3DSIH_TEXKILL: - if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) - return ret; -@@ -4992,12 +5098,12 @@ static void register_map_undominated_use(struct vkd3d_shader_register *reg, stru - { - unsigned int i; - -- if (!register_is_ssa(reg)) -- return; -- -- i = reg->idx[0].offset; -- if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) -- alloc->table[i] = alloc->next_temp_idx++; -+ if (register_is_ssa(reg)) -+ { -+ i = reg->idx[0].offset; -+ if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) -+ alloc->table[i] = alloc->next_temp_idx++; -+ } - - for (i = 0; i < reg->idx_count; ++i) - if (reg->idx[i].rel_addr) -@@ -6056,7 +6162,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - { - enum vkd3d_result result = VKD3D_OK; - -- if ((result = vsir_program_lower_instructions(program)) < 0) -+ if ((result = vsir_program_lower_instructions(program, message_context)) < 0) - return result; - - if (program->shader_version.major >= 6) -diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c -index 56ba6990420..a0a29ed30cb 100644 ---- a/libs/vkd3d/libs/vkd3d/cache.c -+++ b/libs/vkd3d/libs/vkd3d/cache.c -@@ -18,11 +18,60 @@ - - #include "vkd3d_private.h" - -+struct vkd3d_cache_entry_header -+{ -+ uint64_t hash; -+ uint64_t key_size; -+ uint64_t value_size; -+}; -+ - struct vkd3d_shader_cache - { - unsigned int refcount; -+ struct vkd3d_mutex lock; -+ -+ struct rb_tree tree; - }; - -+struct shader_cache_entry -+{ -+ struct vkd3d_cache_entry_header h; -+ struct rb_entry entry; -+ uint8_t *payload; -+}; -+ -+struct shader_cache_key -+{ -+ uint64_t hash; -+ const void *key; -+ uint64_t key_size; -+}; -+ -+static int vkd3d_shader_cache_compare_key(const void *key, const struct rb_entry *entry) -+{ -+ const struct shader_cache_entry *e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); -+ const struct shader_cache_key *k = key; -+ int ret; -+ -+ if ((ret = vkd3d_u64_compare(k->hash, e->h.hash))) -+ return ret; -+ if ((ret = vkd3d_u64_compare(k->key_size, e->h.key_size))) -+ return ret; -+ -+ /* Until now we have not seen an actual hash collision. If the key didn't match it was always -+ * due to a bug in the serialization code or memory corruption. If you see this FIXME please -+ * investigate. */ -+ if ((ret = memcmp(k->key, e->payload, k->key_size))) -+ FIXME("Actual case of a hash collision found.\n"); -+ return ret; -+} -+ -+static void vkd3d_shader_cache_add_entry(struct vkd3d_shader_cache *cache, -+ struct shader_cache_entry *e) -+{ -+ rb_put(&cache->tree, &e->h.hash, &e->entry); -+} -+ - int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) - { - struct vkd3d_shader_cache *object; -@@ -34,6 +83,9 @@ int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) - return VKD3D_ERROR_OUT_OF_MEMORY; - - object->refcount = 1; -+ rb_init(&object->tree, vkd3d_shader_cache_compare_key); -+ vkd3d_mutex_init(&object->lock); -+ - *cache = object; - - return VKD3D_OK; -@@ -46,6 +98,13 @@ unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache) - return refcount; - } - -+static void vkd3d_shader_cache_destroy_entry(struct rb_entry *entry, void *context) -+{ -+ struct shader_cache_entry *e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); -+ vkd3d_free(e->payload); -+ vkd3d_free(e); -+} -+ - unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) - { - unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount); -@@ -54,6 +113,142 @@ unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) - if (refcount) - return refcount; - -+ rb_destroy(&cache->tree, vkd3d_shader_cache_destroy_entry, NULL); -+ vkd3d_mutex_destroy(&cache->lock); -+ - vkd3d_free(cache); - return 0; - } -+ -+static uint64_t vkd3d_shader_cache_hash_key(const void *key, size_t size) -+{ -+ static const uint64_t fnv_prime = 0x00000100000001b3; -+ uint64_t hash = 0xcbf29ce484222325; -+ const uint8_t *k = key; -+ size_t i; -+ -+ for (i = 0; i < size; ++i) -+ hash = (hash ^ k[i]) * fnv_prime; -+ -+ return hash; -+} -+ -+static void vkd3d_shader_cache_lock(struct vkd3d_shader_cache *cache) -+{ -+ vkd3d_mutex_lock(&cache->lock); -+} -+ -+static void vkd3d_shader_cache_unlock(struct vkd3d_shader_cache *cache) -+{ -+ vkd3d_mutex_unlock(&cache->lock); -+} -+ -+int vkd3d_shader_cache_put(struct vkd3d_shader_cache *cache, -+ const void *key, size_t key_size, const void *value, size_t value_size) -+{ -+ struct shader_cache_entry *e; -+ struct shader_cache_key k; -+ struct rb_entry *entry; -+ enum vkd3d_result ret; -+ -+ TRACE("%p, %p, %#zx, %p, %#zx.\n", cache, key, key_size, value, value_size); -+ -+ k.hash = vkd3d_shader_cache_hash_key(key, key_size); -+ k.key = key; -+ k.key_size = key_size; -+ -+ vkd3d_shader_cache_lock(cache); -+ -+ entry = rb_get(&cache->tree, &k); -+ e = entry ? RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry) : NULL; -+ -+ if (e) -+ { -+ WARN("Key already exists, returning VKD3D_ERROR_KEY_ALREADY_EXISTS.\n"); -+ ret = VKD3D_ERROR_KEY_ALREADY_EXISTS; -+ goto done; -+ } -+ -+ e = vkd3d_malloc(sizeof(*e)); -+ if (!e) -+ { -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto done; -+ } -+ e->payload = vkd3d_malloc(key_size + value_size); -+ if (!e->payload) -+ { -+ vkd3d_free(e); -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto done; -+ } -+ -+ e->h.key_size = key_size; -+ e->h.value_size = value_size; -+ e->h.hash = k.hash; -+ memcpy(e->payload, key, key_size); -+ memcpy(e->payload + key_size, value, value_size); -+ -+ vkd3d_shader_cache_add_entry(cache, e); -+ TRACE("Cache entry %#"PRIx64" stored.\n", k.hash); -+ ret = VKD3D_OK; -+ -+done: -+ vkd3d_shader_cache_unlock(cache); -+ return ret; -+} -+ -+int vkd3d_shader_cache_get(struct vkd3d_shader_cache *cache, -+ const void *key, size_t key_size, void *value, size_t *value_size) -+{ -+ struct shader_cache_entry *e; -+ struct shader_cache_key k; -+ struct rb_entry *entry; -+ enum vkd3d_result ret; -+ size_t size_in; -+ -+ TRACE("%p, %p, %#zx, %p, %p.\n", cache, key, key_size, value, value_size); -+ -+ size_in = *value_size; -+ -+ k.hash = vkd3d_shader_cache_hash_key(key, key_size); -+ k.key = key; -+ k.key_size = key_size; -+ -+ vkd3d_shader_cache_lock(cache); -+ -+ entry = rb_get(&cache->tree, &k); -+ if (!entry) -+ { -+ WARN("Entry not found.\n"); -+ ret = VKD3D_ERROR_NOT_FOUND; -+ goto done; -+ } -+ -+ e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); -+ -+ *value_size = e->h.value_size; -+ if (!value) -+ { -+ TRACE("Found item %#"PRIx64", returning needed size %#"PRIx64".\n", -+ e->h.hash, e->h.value_size); -+ ret = VKD3D_OK; -+ goto done; -+ } -+ -+ if (size_in < e->h.value_size) -+ { -+ WARN("Output buffer is too small for item %#"PRIx64", got %#zx want %#"PRIx64".\n", -+ e->h.hash, size_in, e->h.value_size); -+ ret = VKD3D_ERROR_MORE_DATA; -+ goto done; -+ } -+ -+ memcpy(value, e->payload + e->h.key_size, e->h.value_size); -+ ret = VKD3D_OK; -+ TRACE("Returning cached item %#"PRIx64".\n", e->h.hash); -+ -+done: -+ vkd3d_shader_cache_unlock(cache); -+ return ret; -+} -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index a394e3f7592..cb2b6ad0364 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -2685,19 +2685,43 @@ static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetDevice(ID3D12ShaderCache - static HRESULT STDMETHODCALLTYPE d3d12_cache_session_FindValue(ID3D12ShaderCacheSession *iface, - const void *key, UINT key_size, void *value, UINT *value_size) - { -- FIXME("iface %p, key %p, key_size %#x, value %p, value_size %p stub!\n", -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ enum vkd3d_result ret; -+ size_t size; -+ -+ TRACE("iface %p, key %p, key_size %#x, value %p, value_size %p.\n", - iface, key, key_size, value, value_size); - -- return DXGI_ERROR_NOT_FOUND; -+ if (!value_size) -+ { -+ WARN("value_size is NULL, returning E_INVALIDARG.\n"); -+ return E_INVALIDARG; -+ } -+ -+ size = *value_size; -+ ret = vkd3d_shader_cache_get(session->cache, key, key_size, value, &size); -+ *value_size = size; -+ -+ return hresult_from_vkd3d_result(ret); - } - - static HRESULT STDMETHODCALLTYPE d3d12_cache_session_StoreValue(ID3D12ShaderCacheSession *iface, - const void *key, UINT key_size, const void *value, UINT value_size) - { -- FIXME("iface %p, key %p, key_size %#x, value %p, value_size %u stub!\n", iface, key, key_size, -- value, value_size); -+ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); -+ enum vkd3d_result ret; - -- return E_NOTIMPL; -+ TRACE("iface %p, key %p, key_size %#x, value %p, value_size %u.\n", -+ iface, key, key_size, value, value_size); -+ -+ if (!key || !key_size || !value || !value_size) -+ { -+ WARN("Invalid input parameters, returning E_INVALIDARG.\n"); -+ return E_INVALIDARG; -+ } -+ -+ ret = vkd3d_shader_cache_put(session->cache, key, key_size, value, value_size); -+ return hresult_from_vkd3d_result(ret); - } - - static void STDMETHODCALLTYPE d3d12_cache_session_SetDeleteOnDestroy(ID3D12ShaderCacheSession *iface) -@@ -2833,7 +2857,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device9 *ifac - || IsEqualGUID(riid, &IID_ID3D12Object) - || IsEqualGUID(riid, &IID_IUnknown)) - { -- ID3D12Device_AddRef(iface); -+ ID3D12Device9_AddRef(iface); - *object = iface; - return S_OK; - } -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 179999148bc..7a2f464c98e 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -1271,7 +1271,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource2 * - || IsEqualGUID(riid, &IID_ID3D12Object) - || IsEqualGUID(riid, &IID_IUnknown)) - { -- ID3D12Resource_AddRef(iface); -+ ID3D12Resource2_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -2350,16 +2350,16 @@ static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) - i = vkd3d_atomic_increment_u32(&cache->next_index) & HEAD_INDEX_MASK; - for (;;) - { -- if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) -+ if (vkd3d_atomic_compare_exchange_u32(&cache->heads[i].spinlock, 0, 1)) - { - if ((u.object = cache->heads[i].head)) - { - vkd3d_atomic_decrement_u32(&cache->free_count); - cache->heads[i].head = u.header->next; -- vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); - return u.object; - } -- vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); - } - /* Keeping a free count avoids uncertainty over when this loop should terminate, - * which could result in excess allocations gradually increasing without limit. */ -@@ -2381,7 +2381,7 @@ static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, - i = vkd3d_atomic_increment_u32(&cache->next_index) & HEAD_INDEX_MASK; - for (;;) - { -- if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) -+ if (vkd3d_atomic_compare_exchange_u32(&cache->heads[i].spinlock, 0, 1)) - break; - i = (i + 1) & HEAD_INDEX_MASK; - } -@@ -2389,7 +2389,7 @@ static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, - head = cache->heads[i].head; - u.header->next = head; - cache->heads[i].head = u.object; -- vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); - vkd3d_atomic_increment_u32(&cache->free_count); - } - -@@ -2473,7 +2473,7 @@ void vkd3d_view_decref(void *view, struct d3d12_device *device) - - static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) - { -- if ((view = vkd3d_atomic_exchange_pointer(&dst->s.u.object, view))) -+ if ((view = vkd3d_atomic_exchange_ptr(&dst->s.u.object, view))) - vkd3d_view_decref(view, device); - } - -@@ -2652,7 +2652,7 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr - union d3d12_desc_object u; - unsigned int i, next; - -- if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) -+ if ((i = vkd3d_atomic_exchange_u32(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) - return; - - writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; -@@ -2667,7 +2667,7 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr - for (; i != UINT_MAX; i = next) - { - src = &descriptors[i]; -- next = vkd3d_atomic_exchange(&src->next, 0); -+ next = vkd3d_atomic_exchange_u32(&src->next, 0); - next = (int)next >> 1; - - /* A race exists here between updating src->next and getting the current object. The best -@@ -2695,13 +2695,13 @@ static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_des - head = descriptor_heap->dirty_list_head; - - /* Only one thread can swap the value away from zero. */ -- if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) -+ if (!vkd3d_atomic_compare_exchange_u32(&dst->next, 0, (head << 1) | 1)) - return; - /* Now it is safe to modify 'next' to another nonzero value if necessary. */ -- while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) -+ while (!vkd3d_atomic_compare_exchange_u32(&descriptor_heap->dirty_list_head, head, i)) - { - head = descriptor_heap->dirty_list_head; -- vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); -+ vkd3d_atomic_exchange_u32(&dst->next, (head << 1) | 1); - } - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index 29305fbdc63..c7431bd821b 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -71,7 +71,7 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, - - if (!device) - { -- ID3D12Device_Release(&object->ID3D12Device9_iface); -+ ID3D12Device9_Release(&object->ID3D12Device9_iface); - return S_FALSE; - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 5f60c8d90ad..d1fa866d9e3 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -19,6 +19,9 @@ - #ifndef __VKD3D_PRIVATE_H - #define __VKD3D_PRIVATE_H - -+#ifndef __MINGW32__ -+#define WIDL_C_INLINE_WRAPPERS -+#endif - #define COBJMACROS - #define NONAMELESSUNION - #define VK_NO_PROTOTYPES -@@ -194,93 +197,14 @@ struct vkd3d_instance - unsigned int refcount; - }; - --#ifdef _WIN32 -- --union vkd3d_thread_handle --{ -- void *handle; --}; -- --static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) --{ -- return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; --} -- --static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) --{ -- return InterlockedExchange((LONG volatile *)x, val); --} -- --static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) --{ -- return InterlockedCompareExchangePointer(x, xchg, cmp) == cmp; --} -- --static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) --{ -- return InterlockedExchangePointer(x, val); --} -- --#else /* _WIN32 */ -- --#include -- - union vkd3d_thread_handle - { -+#ifndef _WIN32 - pthread_t pthread; -+#endif - void *handle; - }; - --# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP --static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) --{ -- return __sync_bool_compare_and_swap(x, cmp, xchg); --} -- --static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) --{ -- return __sync_bool_compare_and_swap(x, cmp, xchg); --} --# else --# error "vkd3d_atomic_compare_exchange() not implemented for this platform" --# endif -- --# if HAVE_ATOMIC_EXCHANGE_N --static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) --{ -- return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); --} -- --static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) --{ -- return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); --} --# elif HAVE_SYNC_BOOL_COMPARE_AND_SWAP --static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) --{ -- unsigned int i; -- do -- { -- i = *x; -- } while (!__sync_bool_compare_and_swap(x, i, val)); -- return i; --} -- --static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) --{ -- void *p; -- do -- { -- p = *x; -- } while (!__sync_bool_compare_and_swap(x, p, val)); -- return p; --} --# else --# error "vkd3d_atomic_exchange() not implemented for this platform" --# endif -- --#endif /* _WIN32 */ -- - HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, - PFN_vkd3d_thread thread_main, void *data, union vkd3d_thread_handle *thread); - HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_handle *thread); -@@ -742,7 +666,7 @@ static inline bool vkd3d_view_incref(void *desc) - if (refcount <= 0) - return false; - } -- while (!vkd3d_atomic_compare_exchange(&h->refcount, refcount, refcount + 1)); -+ while (!vkd3d_atomic_compare_exchange_u32(&h->refcount, refcount, refcount + 1)); - - return true; - } -@@ -1852,5 +1776,9 @@ struct vkd3d_shader_cache; - int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache); - unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache); - unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache); -+int vkd3d_shader_cache_put(struct vkd3d_shader_cache *cache, -+ const void *key, size_t key_size, const void *value, size_t value_size); -+int vkd3d_shader_cache_get(struct vkd3d_shader_cache *cache, -+ const void *key, size_t key_size, void *value, size_t *value_size); - - #endif /* __VKD3D_PRIVATE_H */ --- -2.43.0 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch deleted file mode 100644 index cc5b3afa..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch +++ /dev/null @@ -1,1497 +0,0 @@ -From a10a5f6d7f227464e7b594421cda94412460d7dc Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 3 May 2024 07:31:39 +1000 -Subject: [PATCH] Updated vkd3d to 62a512c4f8c4070f0f4f3ed8e70b6f0bc885da30. - ---- - libs/vkd3d/include/vkd3d.h | 91 +++++++++- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 11 ++ - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 22 +-- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 155 +++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 39 +++-- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1 + - .../libs/vkd3d-shader/hlsl_constant_ops.c | 130 +++++++++++++++ - libs/vkd3d/libs/vkd3d-shader/spirv.c | 113 +++++++++++-- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 44 ++--- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 31 ++-- - libs/vkd3d/libs/vkd3d/command.c | 21 +-- - libs/vkd3d/libs/vkd3d/device.c | 36 +++- - libs/vkd3d/libs/vkd3d/state.c | 2 + - 14 files changed, 595 insertions(+), 102 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index 71c56331d86..38249f0bf5c 100644 ---- a/libs/vkd3d/include/vkd3d.h -+++ b/libs/vkd3d/include/vkd3d.h -@@ -237,47 +237,134 @@ struct vkd3d_host_time_domain_info - uint64_t ticks_per_second; - }; - -+/** -+ * A chained structure containing device creation parameters. -+ */ - struct vkd3d_device_create_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** The minimum feature level to request. Device creation will fail with E_INVALIDARG if the -+ * Vulkan device doesn't have the features needed to fulfill the request. */ - D3D_FEATURE_LEVEL minimum_feature_level; - -+ /** -+ * The vkd3d instance to use to create a device. Either this or instance_create_info must be -+ * set. -+ */ - struct vkd3d_instance *instance; -+ /** -+ * The parameters used to create an instance, which is then used to create a device. Either -+ * this or instance must be set. -+ */ - const struct vkd3d_instance_create_info *instance_create_info; - -+ /** -+ * The Vulkan physical device to use. If it is NULL, the first physical device found is used, -+ * prioritizing discrete GPUs over integrated GPUs and integrated GPUs over all the others. -+ * -+ * This parameter can be overridden by setting environment variable VKD3D_VULKAN_DEVICE. -+ */ - VkPhysicalDevice vk_physical_device; - -+ /** -+ * A list of Vulkan device extensions to request. They are intended as required, so device -+ * creation will fail if any of them is not available. -+ */ - const char * const *device_extensions; -+ /** The number of elements in the device_extensions array. */ - uint32_t device_extension_count; - -+ /** -+ * An object to be set as the device parent. This is not used by vkd3d except for being -+ * returned by vkd3d_get_device_parent. -+ */ - IUnknown *parent; -+ /** -+ * The adapter LUID to be set for the device. This is not used by vkd3d except for being -+ * returned by GetAdapterLuid. -+ */ - LUID adapter_luid; - }; - --/* Extends vkd3d_device_create_info. Available since 1.2. */ -+/** -+ * A chained structure to specify optional device extensions. -+ * -+ * This structure extends vkd3d_device_create_info. -+ * -+ * \since 1.2 -+ */ - struct vkd3d_optional_device_extensions_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** -+ * A list of optional Vulkan device extensions to request. Device creation does not fail if -+ * they are not available. -+ */ - const char * const *extensions; -+ /** The number of elements in the extensions array. */ - uint32_t extension_count; - }; - --/* vkd3d_image_resource_create_info flags */ -+/** -+ * When specified as a flag of vkd3d_image_resource_create_info, it means that vkd3d will do the -+ * initial transition operation on the image from VK_IMAGE_LAYOUT_UNDEFINED to its appropriate -+ * Vulkan layout (depending on its D3D12 resource state). If this flag is not specified the caller -+ * is responsible for transitioning the Vulkan image to the appropriate layout. -+ */ - #define VKD3D_RESOURCE_INITIAL_STATE_TRANSITION 0x00000001 -+/** -+ * When specified as a flag of vkd3d_image_resource_create_info, it means that field present_state -+ * is honored. -+ */ - #define VKD3D_RESOURCE_PRESENT_STATE_TRANSITION 0x00000002 - -+/** -+ * A chained structure containing the parameters to create a D3D12 resource backed by a Vulkan -+ * image. -+ */ - struct vkd3d_image_resource_create_info - { -+ /** Must be set to VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO. */ - enum vkd3d_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ - const void *next; - -+ /** The Vulkan image that backs the resource. */ - VkImage vk_image; -+ /** The resource description. */ - D3D12_RESOURCE_DESC desc; -+ /** -+ * A combination of zero or more flags. The valid flags are -+ * VKD3D_RESOURCE_INITIAL_STATE_TRANSITION and VKD3D_RESOURCE_PRESENT_STATE_TRANSITION. -+ */ - unsigned int flags; -+ /** -+ * This field specifies how to handle resource state D3D12_RESOURCE_STATE_PRESENT for -+ * the resource. Notice that on D3D12 there is no difference between -+ * D3D12_RESOURCE_STATE_COMMON and D3D12_RESOURCE_STATE_PRESENT (they have the same value), -+ * while on Vulkan two different layouts are used (VK_IMAGE_LAYOUT_GENERAL and -+ * VK_IMAGE_LAYOUT_PRESENT_SRC_KHR). -+ * -+ * * When flag VKD3D_RESOURCE_PRESENT_STATE_TRANSITION is not specified, field -+ * present_state is ignored and resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is -+ * mapped to VK_IMAGE_LAYOUT_GENERAL; this is useful for non-swapchain resources. -+ * * Otherwise, when present_state is D3D12_RESOURCE_STATE_PRESENT/_COMMON, resource state -+ * D3D12_RESOURCE_STATE_COMMON/_PRESENT is mapped to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; -+ * this is useful for swapchain resources that are directly backed by a Vulkan swapchain -+ * image. -+ * * Otherwise, resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is treated as resource -+ * state present_state; this is useful for swapchain resources that backed by a Vulkan -+ * non-swapchain image, which the client will likely consume with a copy or drawing -+ * operation at presentation time. -+ */ - D3D12_RESOURCE_STATES present_state; - }; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index cd8ba0a7d2b..b2f329cd199 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -327,6 +327,9 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_UTOD ] = "utod", - [VKD3DSIH_UTOF ] = "utof", - [VKD3DSIH_UTOU ] = "utou", -+ [VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL ] = "wave_active_all_equal", -+ [VKD3DSIH_WAVE_ALL_TRUE ] = "wave_all_true", -+ [VKD3DSIH_WAVE_ANY_TRUE ] = "wave_any_true", - [VKD3DSIH_XOR ] = "xor", - }; - -@@ -1161,6 +1164,14 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - vkd3d_string_buffer_printf(buffer, "sr"); - break; - -+ case VKD3DSPR_WAVELANECOUNT: -+ vkd3d_string_buffer_printf(buffer, "vWaveLaneCount"); -+ break; -+ -+ case VKD3DSPR_WAVELANEINDEX: -+ vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); -+ break; -+ - default: - vkd3d_string_buffer_printf(buffer, "%s%s", - compiler->colours.error, reg->type, compiler->colours.reset); -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 09e4f596241..aa2358440e5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -215,8 +215,12 @@ struct vkd3d_shader_sm1_parser - - struct vkd3d_shader_parser p; - -+ struct -+ { - #define MAX_CONSTANT_COUNT 8192 -- uint32_t constant_def_mask[3][VKD3D_BITMAP_SIZE(MAX_CONSTANT_COUNT)]; -+ uint32_t def_mask[VKD3D_BITMAP_SIZE(MAX_CONSTANT_COUNT)]; -+ uint32_t count; -+ } constants[3]; - }; - - /* This table is not order or position dependent. */ -@@ -750,15 +754,13 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * - static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, - enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) - { -- struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; -- -- desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); -+ sm1->constants[set].count = max(sm1->constants[set].count, index + 1); - if (from_def) - { - /* d3d shaders have a maximum of 8192 constants; we should not overrun - * this array. */ -- assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); -- bitmap_set(sm1->constant_def_mask[set], index); -+ assert((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); -+ bitmap_set(sm1->constants[set].def_mask, index); - } - } - -@@ -1301,9 +1303,9 @@ static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, - /* Find the highest constant index which is not written by a DEF - * instruction. We can't (easily) use an FFZ function for this since it - * needs to be limited by the highest used register index. */ -- for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) -+ for (j = sm1->constants[set].count; j > 0; --j) - { -- if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) -+ if (!bitmap_is_set(sm1->constants[set].def_mask, j - 1)) - return j; - } - -@@ -1354,8 +1356,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - ++instructions->count; - } - -- for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) -- sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); -+ for (i = 0; i < ARRAY_SIZE(sm1->p.program.flat_constant_count); ++i) -+ sm1->p.program.flat_constant_count[i] = get_external_constant_count(sm1, i); - - if (!sm1->p.failed) - ret = vkd3d_shader_parser_validate(&sm1->p); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 6a1fb6bddb7..e636ad917db 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -31,7 +31,7 @@ static const uint64_t GLOBALVAR_FLAG_EXPLICIT_TYPE = 2; - static const unsigned int GLOBALVAR_ADDRESS_SPACE_SHIFT = 2; - static const uint64_t ALLOCA_FLAG_IN_ALLOCA = 0x20; - static const uint64_t ALLOCA_FLAG_EXPLICIT_TYPE = 0x40; --static const uint64_t ALLOCA_ALIGNMENT_MASK = ALLOCA_FLAG_IN_ALLOCA - 1; -+static const uint64_t ALLOCA_ALIGNMENT_MASK = 0x1f; - static const unsigned int SHADER_DESCRIPTOR_TYPE_COUNT = 4; - static const size_t MAX_IR_INSTRUCTIONS_PER_DXIL_INSTRUCTION = 11; - -@@ -103,6 +103,7 @@ enum bitcode_constant_code - CST_CODE_INTEGER = 4, - CST_CODE_FLOAT = 6, - CST_CODE_STRING = 8, -+ CST_CODE_CE_CAST = 11, - CST_CODE_CE_GEP = 12, - CST_CODE_CE_INBOUNDS_GEP = 20, - CST_CODE_DATA = 22, -@@ -413,6 +414,7 @@ enum dx_intrinsic_opcode - DX_DERIV_COARSEY = 84, - DX_DERIV_FINEX = 85, - DX_DERIV_FINEY = 86, -+ DX_COVERAGE = 91, - DX_THREAD_ID = 93, - DX_GROUP_ID = 94, - DX_THREAD_ID_IN_GROUP = 95, -@@ -425,6 +427,11 @@ enum dx_intrinsic_opcode - DX_STORE_PATCH_CONSTANT = 106, - DX_OUTPUT_CONTROL_POINT_ID = 107, - DX_PRIMITIVE_ID = 108, -+ DX_WAVE_GET_LANE_INDEX = 111, -+ DX_WAVE_GET_LANE_COUNT = 112, -+ DX_WAVE_ANY_TRUE = 113, -+ DX_WAVE_ALL_TRUE = 114, -+ DX_WAVE_ACTIVE_ALL_EQUAL = 115, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, - DX_RAW_BUFFER_LOAD = 139, -@@ -606,6 +613,7 @@ struct sm6_value - enum sm6_value_type value_type; - unsigned int structure_stride; - bool is_undefined; -+ bool is_back_ref; - union - { - struct sm6_function_data function; -@@ -2216,6 +2224,11 @@ static bool sm6_value_is_ssa(const struct sm6_value *value) - return sm6_value_is_register(value) && register_is_ssa(&value->u.reg); - } - -+static bool sm6_value_is_numeric_array(const struct sm6_value *value) -+{ -+ return sm6_value_is_register(value) && register_is_numeric_array(&value->u.reg); -+} -+ - static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) - { - if (!sm6_value_is_constant(value)) -@@ -2658,6 +2671,18 @@ static bool sm6_value_validate_is_pointer(const struct sm6_value *value, struct - return true; - } - -+static bool sm6_value_validate_is_backward_ref(const struct sm6_value *value, struct sm6_parser *sm6) -+{ -+ if (!value->is_back_ref) -+ { -+ FIXME("Forward-referenced pointers are not supported.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Forward-referenced pointer declarations are not supported."); -+ return false; -+ } -+ return true; -+} -+ - static bool sm6_value_validate_is_numeric(const struct sm6_value *value, struct sm6_parser *sm6) - { - if (!sm6_type_is_numeric(value->type)) -@@ -3086,15 +3111,16 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c - static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) - { - enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; -- const struct sm6_type *type, *elem_type; -+ const struct sm6_type *type, *elem_type, *ptr_type; -+ size_t i, base_value_idx, value_idx; - enum vkd3d_data_type reg_data_type; - const struct dxil_record *record; -+ const struct sm6_value *src; - enum vkd3d_result ret; - struct sm6_value *dst; -- size_t i, value_idx; - uint64_t value; - -- for (i = 0, type = NULL; i < block->record_count; ++i) -+ for (i = 0, type = NULL, base_value_idx = sm6->value_count; i < block->record_count; ++i) - { - sm6->p.location.column = i; - record = block->records[i]; -@@ -3135,6 +3161,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - dst = sm6_parser_get_current_value(sm6); - dst->type = type; - dst->value_type = VALUE_TYPE_REG; -+ dst->is_back_ref = true; - vsir_register_init(&dst->u.reg, reg_type, reg_data_type, 0); - - switch (record->code) -@@ -3209,6 +3236,48 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - return ret; - break; - -+ case CST_CODE_CE_CAST: -+ if (!dxil_record_validate_operand_count(record, 3, 3, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if ((value = record->operands[0]) != CAST_BITCAST) -+ { -+ WARN("Unhandled constexpr cast op %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Constexpr cast op %"PRIu64" is unhandled.", value); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ ptr_type = sm6_parser_get_type(sm6, record->operands[1]); -+ if (!sm6_type_is_pointer(ptr_type)) -+ { -+ WARN("Constexpr cast at constant idx %zu is not a pointer.\n", value_idx); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Constexpr cast source operand is not a pointer."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if ((value = record->operands[2]) >= sm6->cur_max_value) -+ { -+ WARN("Invalid value index %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value index %"PRIu64".", value); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ else if (value == value_idx) -+ { -+ WARN("Invalid value self-reference at %"PRIu64".\n", value); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value self-reference for a constexpr cast."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ /* Resolve later in case forward refs exist. */ -+ dst->type = type; -+ dst->u.reg.type = VKD3DSPR_COUNT; -+ dst->u.reg.idx[0].offset = value; -+ break; -+ - case CST_CODE_UNDEF: - dxil_record_validate_operand_max_count(record, 0, sm6); - dst->u.reg.type = VKD3DSPR_UNDEF; -@@ -3234,6 +3303,29 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - ++sm6->value_count; - } - -+ /* Resolve cast forward refs. */ -+ for (i = base_value_idx; i < sm6->value_count; ++i) -+ { -+ dst = &sm6->values[i]; -+ if (dst->u.reg.type != VKD3DSPR_COUNT) -+ continue; -+ -+ type = dst->type; -+ -+ src = &sm6->values[dst->u.reg.idx[0].offset]; -+ if (!sm6_value_is_numeric_array(src)) -+ { -+ WARN("Value is not an array.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Constexpr cast source value is not a global array element."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ *dst = *src; -+ dst->type = type; -+ dst->u.reg.data_type = vkd3d_data_type_from_sm6_type(type->u.pointer.type); -+ } -+ - return VKD3D_OK; - } - -@@ -3462,6 +3554,7 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ - dst = sm6_parser_get_current_value(sm6); - dst->type = type; - dst->value_type = VALUE_TYPE_REG; -+ dst->is_back_ref = true; - - if (is_constant && !init) - { -@@ -3946,7 +4039,8 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ - uint64_t code; - - if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -- || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) -+ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) -+ || !sm6_value_validate_is_backward_ref(ptr, sm6)) - return; - - if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) -@@ -4421,6 +4515,12 @@ static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) - return VKD3DSIH_F32TOF16; - case DX_LEGACY_F16TOF32: - return VKD3DSIH_F16TOF32; -+ case DX_WAVE_ACTIVE_ALL_EQUAL: -+ return VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL; -+ case DX_WAVE_ALL_TRUE: -+ return VKD3DSIH_WAVE_ALL_TRUE; -+ case DX_WAVE_ANY_TRUE: -+ return VKD3DSIH_WAVE_ANY_TRUE; - default: - vkd3d_unreachable(); - } -@@ -4729,6 +4829,12 @@ static void sm6_parser_emit_dx_input_register_mov(struct sm6_parser *sm6, - instruction_dst_param_init_ssa_scalar(ins, sm6); - } - -+static void sm6_parser_emit_dx_coverage(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_COVERAGE, VKD3D_DATA_UINT); -+} -+ - static const struct sm6_descriptor_info *sm6_parser_get_descriptor(struct sm6_parser *sm6, - enum vkd3d_shader_descriptor_type type, unsigned int id, const struct sm6_value *address) - { -@@ -5804,6 +5910,26 @@ static void sm6_parser_emit_dx_texture_store(struct sm6_parser *sm6, enum dx_int - dst_param_init_with_mask(dst_param, write_mask); - } - -+static void sm6_parser_emit_dx_wave_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ enum vkd3d_shader_register_type type; -+ -+ switch (op) -+ { -+ case DX_WAVE_GET_LANE_COUNT: -+ type = VKD3DSPR_WAVELANECOUNT; -+ break; -+ case DX_WAVE_GET_LANE_INDEX: -+ type = VKD3DSPR_WAVELANEINDEX; -+ break; -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, type, VKD3D_DATA_UINT); -+} -+ - struct sm6_dx_opcode_info - { - const char *ret_type; -@@ -5820,6 +5946,7 @@ struct sm6_dx_opcode_info - C -> constant or undefined int8/16/32 - i -> int32 - m -> int16/32/64 -+ n -> any numeric - f -> float - d -> double - e -> half/float -@@ -5847,6 +5974,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, - [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, - [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, -+ [DX_COVERAGE ] = {"i", "", sm6_parser_emit_dx_coverage}, - [DX_CREATE_HANDLE ] = {"H", "ccib", sm6_parser_emit_dx_create_handle}, - [DX_DERIV_COARSEX ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_COARSEY ] = {"e", "R", sm6_parser_emit_dx_unary}, -@@ -5922,6 +6050,11 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, - [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, - [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, -+ [DX_WAVE_ACTIVE_ALL_EQUAL ] = {"1", "n", sm6_parser_emit_dx_unary}, -+ [DX_WAVE_ALL_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, -+ [DX_WAVE_ANY_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, -+ [DX_WAVE_GET_LANE_COUNT ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, -+ [DX_WAVE_GET_LANE_INDEX ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, - }; - - static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_value *value, char info_type, -@@ -5953,6 +6086,8 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc - return sm6_type_is_i32(type); - case 'm': - return sm6_type_is_i16_i32_i64(type); -+ case 'n': -+ return sm6_type_is_numeric(type); - case 'f': - return sm6_type_is_float(type); - case 'd': -@@ -6446,7 +6581,8 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re - uint64_t code; - - if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) -- || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) -+ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) -+ || !sm6_value_validate_is_backward_ref(ptr, sm6)) - return; - - if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) -@@ -6687,6 +6823,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor - return; - if (!sm6_value_validate_is_register(ptr, sm6) - || !sm6_value_validate_is_pointer(ptr, sm6) -+ || !sm6_value_validate_is_backward_ref(ptr, sm6) - || !dxil_record_validate_operand_count(record, i + 2, i + 3, sm6)) - return; - -@@ -6870,7 +7007,8 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco - - if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) - || !sm6_value_validate_is_register(ptr, sm6) -- || !sm6_value_validate_is_pointer(ptr, sm6)) -+ || !sm6_value_validate_is_pointer(ptr, sm6) -+ || !sm6_value_validate_is_backward_ref(ptr, sm6)) - { - return; - } -@@ -7478,6 +7616,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - fwd_type = dst->type; - dst->type = NULL; - dst->value_type = VALUE_TYPE_REG; -+ dst->is_back_ref = true; - is_terminator = false; - - record = block->records[i]; -@@ -8081,7 +8220,9 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = - { - [SEMANTIC_KIND_ARBITRARY] = VKD3D_SHADER_SV_NONE, - [SEMANTIC_KIND_VERTEXID] = VKD3D_SHADER_SV_VERTEX_ID, -+ [SEMANTIC_KIND_INSTANCEID] = VKD3D_SHADER_SV_INSTANCE_ID, - [SEMANTIC_KIND_POSITION] = VKD3D_SHADER_SV_POSITION, -+ [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, - [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, - [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, - [SEMANTIC_KIND_DEPTH] = VKD3D_SHADER_SV_DEPTH, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 4fc1493bdce..0b48b17d21c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -3517,6 +3517,7 @@ static int compare_function_rb(const void *key, const struct rb_entry *entry) - - static void declare_predefined_types(struct hlsl_ctx *ctx) - { -+ struct vkd3d_string_buffer *name; - unsigned int x, y, bt, i, v; - struct hlsl_type *type; - -@@ -3529,7 +3530,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - "uint", - "bool", - }; -- char name[15]; - - static const char *const variants_float[] = {"min10float", "min16float"}; - static const char *const variants_int[] = {"min12int", "min16int"}; -@@ -3573,28 +3573,34 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - {"technique11", 11}, - }; - -+ if (!(name = hlsl_get_string_buffer(ctx))) -+ return; -+ - for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) - { - for (y = 1; y <= 4; ++y) - { - for (x = 1; x <= 4; ++x) - { -- sprintf(name, "%s%ux%u", names[bt], y, x); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s%ux%u", names[bt], y, x); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); - hlsl_scope_add_type(ctx->globals, type); - ctx->builtin_types.matrix[bt][x - 1][y - 1] = type; - - if (y == 1) - { -- sprintf(name, "%s%u", names[bt], x); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s%u", names[bt], x); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); - hlsl_scope_add_type(ctx->globals, type); - ctx->builtin_types.vector[bt][x - 1] = type; - - if (x == 1) - { -- sprintf(name, "%s", names[bt]); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s", names[bt]); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); - hlsl_scope_add_type(ctx->globals, type); - ctx->builtin_types.scalar[bt] = type; - } -@@ -3637,22 +3643,25 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - { - for (x = 1; x <= 4; ++x) - { -- sprintf(name, "%s%ux%u", variants[v], y, x); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s%ux%u", variants[v], y, x); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); - type->is_minimum_precision = 1; - hlsl_scope_add_type(ctx->globals, type); - - if (y == 1) - { -- sprintf(name, "%s%u", variants[v], x); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s%u", variants[v], x); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); - type->is_minimum_precision = 1; - hlsl_scope_add_type(ctx->globals, type); - - if (x == 1) - { -- sprintf(name, "%s", variants[v]); -- type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); -+ vkd3d_string_buffer_clear(name); -+ vkd3d_string_buffer_printf(name, "%s", variants[v]); -+ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); - type->is_minimum_precision = 1; - hlsl_scope_add_type(ctx->globals, type); - } -@@ -3690,6 +3699,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - type->e.version = technique_types[i].version; - hlsl_scope_add_type(ctx->globals, type); - } -+ -+ hlsl_release_string_buffer(ctx, name); - } - - static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, -@@ -3965,7 +3976,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d - } - if (ret >= 0) - { -- ret = vkd3d_shader_parser_compile(parser, &info, out, message_context); -+ ret = vsir_program_compile(&parser->program, parser->config_flags, &info, out, message_context); - vkd3d_shader_parser_destroy(parser); - } - vkd3d_shader_free_shader_code(&info.source); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 1e5f0805152..c3a4c6bd291 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -1422,6 +1422,7 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - - bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); - bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); -+bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), - struct hlsl_block *block, void *context); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index a6d6b336b40..94acb70fff9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -5427,6 +5427,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - do - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 4cea98e9286..51f2f9cc050 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -1396,6 +1396,136 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return success; - } - -+static bool constant_is_zero(struct hlsl_ir_constant *const_arg) -+{ -+ struct hlsl_type *data_type = const_arg->node.data_type; -+ unsigned int k; -+ -+ for (k = 0; k < data_type->dimx; ++k) -+ { -+ switch (data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (const_arg->value.u[k].f != 0.0f) -+ return false; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (const_arg->value.u[k].d != 0.0) -+ return false; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_BOOL: -+ if (const_arg->value.u[k].u != 0) -+ return false; -+ break; -+ -+ default: -+ return false; -+ } -+ } -+ return true; -+} -+ -+static bool constant_is_one(struct hlsl_ir_constant *const_arg) -+{ -+ struct hlsl_type *data_type = const_arg->node.data_type; -+ unsigned int k; -+ -+ for (k = 0; k < data_type->dimx; ++k) -+ { -+ switch (data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (const_arg->value.u[k].f != 1.0f) -+ return false; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (const_arg->value.u[k].d != 1.0) -+ return false; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_BOOL: -+ if (const_arg->value.u[k].u != 1) -+ return false; -+ break; -+ -+ default: -+ return false; -+ } -+ } -+ return true; -+} -+ -+bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_constant *const_arg = NULL; -+ struct hlsl_ir_node *mut_arg = NULL; -+ struct hlsl_ir_node *res_node; -+ struct hlsl_ir_expr *expr; -+ unsigned int i; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ -+ if (instr->data_type->class > HLSL_CLASS_VECTOR) -+ return false; -+ -+ /* Verify that the expression has two operands. */ -+ for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) -+ { -+ if (!!expr->operands[i].node != (i < 2)) -+ return false; -+ } -+ -+ if (expr->operands[0].node->type == HLSL_IR_CONSTANT) -+ { -+ const_arg = hlsl_ir_constant(expr->operands[0].node); -+ mut_arg = expr->operands[1].node; -+ } -+ else if (expr->operands[1].node->type == HLSL_IR_CONSTANT) -+ { -+ mut_arg = expr->operands[0].node; -+ const_arg = hlsl_ir_constant(expr->operands[1].node); -+ } -+ else -+ { -+ return false; -+ } -+ -+ res_node = NULL; -+ switch (expr->op) -+ { -+ case HLSL_OP2_ADD: -+ if (constant_is_zero(const_arg)) -+ res_node = mut_arg; -+ break; -+ -+ case HLSL_OP2_MUL: -+ if (constant_is_one(const_arg)) -+ res_node = mut_arg; -+ break; -+ -+ default: -+ break; -+ } -+ -+ if (res_node) -+ { -+ hlsl_replace_node(&expr->node, res_node); -+ return true; -+ } -+ return false; -+} -+ - bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_constant_value value; -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index c4e712b8471..dc9e8c06a5e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -361,6 +361,7 @@ struct vkd3d_spirv_builder - uint32_t type_sampler_id; - uint32_t type_bool_id; - uint32_t type_void_id; -+ uint32_t scope_subgroup_id; - - struct vkd3d_spirv_stream debug_stream; /* debug instructions */ - struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */ -@@ -1741,6 +1742,16 @@ static void vkd3d_spirv_build_op_memory_barrier(struct vkd3d_spirv_builder *buil - SpvOpMemoryBarrier, memory_id, memory_semantics_id); - } - -+static uint32_t vkd3d_spirv_build_op_scope_subgroup(struct vkd3d_spirv_builder *builder) -+{ -+ return vkd3d_spirv_get_op_constant(builder, vkd3d_spirv_get_op_type_int(builder, 32, 0), SpvScopeSubgroup); -+} -+ -+static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *builder) -+{ -+ return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); -+} -+ - static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, - enum GLSLstd450 op, uint32_t result_type, uint32_t operand) - { -@@ -2453,8 +2464,7 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) - static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, -- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, -- uint64_t config_flags) -+ struct vkd3d_shader_message_context *message_context, uint64_t config_flags) - { - const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; - const struct shader_signature *output_signature = &program->output_signature; -@@ -2470,7 +2480,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - - memset(compiler, 0, sizeof(*compiler)); - compiler->message_context = message_context; -- compiler->location = *location; -+ compiler->location.source_name = compile_info->source_name; - compiler->config_flags = config_flags; - - if ((target_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO))) -@@ -2632,6 +2642,11 @@ static bool spirv_compiler_is_opengl_target(const struct spirv_compiler *compile - return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5; - } - -+static bool spirv_compiler_is_spirv_min_1_3_target(const struct spirv_compiler *compiler) -+{ -+ return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; -+} -+ - static bool spirv_compiler_is_target_extension_supported(const struct spirv_compiler *compiler, - enum vkd3d_shader_spirv_extension extension) - { -@@ -3150,6 +3165,12 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s - case VKD3DSPR_OUTSTENCILREF: - snprintf(buffer, buffer_size, "oStencilRef"); - break; -+ case VKD3DSPR_WAVELANECOUNT: -+ snprintf(buffer, buffer_size, "vWaveLaneCount"); -+ break; -+ case VKD3DSPR_WAVELANEINDEX: -+ snprintf(buffer, buffer_size, "vWaveLaneIndex"); -+ break; - default: - FIXME("Unhandled register %#x.\n", reg->type); - snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); -@@ -4535,6 +4556,10 @@ static void spirv_compiler_decorate_builtin(struct spirv_compiler *compiler, - case SpvBuiltInCullDistance: - vkd3d_spirv_enable_capability(builder, SpvCapabilityCullDistance); - break; -+ case SpvBuiltInSubgroupSize: -+ case SpvBuiltInSubgroupLocalInvocationId: -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); -+ break; - default: - break; - } -@@ -4724,6 +4749,9 @@ vkd3d_register_builtins[] = - {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - - {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, -+ -+ {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, -+ {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, - }; - - static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, -@@ -5772,6 +5800,23 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler - flags &= ~VKD3DSGF_ENABLE_INT64; - } - -+ if (flags & VKD3DSGF_ENABLE_WAVE_INTRINSICS) -+ { -+ if (!(compiler->features & VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS)) -+ { -+ WARN("Unsupported wave ops.\n"); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -+ "The target environment does not support wave ops."); -+ } -+ else if (!spirv_compiler_is_spirv_min_1_3_target(compiler)) -+ { -+ WARN("Wave ops enabled but environment does not support SPIR-V 1.3 or greater.\n"); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -+ "The target environment uses wave ops but does not support SPIR-V 1.3 or greater."); -+ } -+ flags &= ~VKD3DSGF_ENABLE_WAVE_INTRINSICS; -+ } -+ - if (flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)) - FIXME("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); - else -@@ -9713,6 +9758,41 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_end_primitive(builder); - } - -+static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) -+{ -+ switch (handler_idx) -+ { -+ case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: -+ return SpvOpGroupNonUniformAllEqual; -+ case VKD3DSIH_WAVE_ALL_TRUE: -+ return SpvOpGroupNonUniformAll; -+ case VKD3DSIH_WAVE_ANY_TRUE: -+ return SpvOpGroupNonUniformAny; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id; -+ SpvOp op; -+ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); -+ -+ op = map_wave_bool_op(instruction->handler_idx); -+ type_id = vkd3d_spirv_get_op_type_bool(builder); -+ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -+ val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, -+ type_id, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ - /* This function is called after declarations are processed. */ - static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) - { -@@ -9732,6 +9812,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - { - int ret = VKD3D_OK; - -+ compiler->location = instruction->location; -+ - switch (instruction->handler_idx) - { - case VKD3DSIH_DCL_GLOBAL_FLAGS: -@@ -10055,6 +10137,11 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_CUT_STREAM: - spirv_compiler_emit_cut_stream(compiler, instruction); - break; -+ case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: -+ case VKD3DSIH_WAVE_ALL_TRUE: -+ case VKD3DSIH_WAVE_ANY_TRUE: -+ spirv_compiler_emit_wave_bool_op(compiler, instruction); -+ break; - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: -@@ -10151,15 +10238,13 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c - } - } - --static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, -- struct vkd3d_shader_code *spirv) -+static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv) - { - const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_shader_instruction_array instructions; -- struct vsir_program *program = &parser->program; - enum vkd3d_shader_spirv_environment environment; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; -@@ -10175,9 +10260,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - - spirv_compiler_emit_descriptor_declarations(compiler); - -- compiler->location.column = 0; -- compiler->location.line = 1; -- - if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -@@ -10202,7 +10284,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - - for (i = 0; i < instructions.count && result >= 0; ++i) - { -- compiler->location.line = i + 1; - result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); - } - -@@ -10249,7 +10330,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler), environment)) - return VKD3D_ERROR; - -- if (TRACE_ON() || parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) -+ if (TRACE_ON() || compiler->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) - { - struct vkd3d_string_buffer buffer; - -@@ -10287,7 +10368,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - return VKD3D_OK; - } - --int spirv_compile(struct vkd3d_shader_parser *parser, -+int spirv_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -@@ -10295,14 +10376,14 @@ int spirv_compile(struct vkd3d_shader_parser *parser, - struct spirv_compiler *spirv_compiler; - int ret; - -- if (!(spirv_compiler = spirv_compiler_create(&parser->program, compile_info, -- scan_descriptor_info, message_context, &parser->location, parser->config_flags))) -+ if (!(spirv_compiler = spirv_compiler_create(program, compile_info, -+ scan_descriptor_info, message_context, config_flags))) - { - ERR("Failed to create SPIR-V compiler.\n"); - return VKD3D_ERROR; - } - -- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); -+ ret = spirv_compiler_generate_spirv(spirv_compiler, program, compile_info, out); - - spirv_compiler_destroy(spirv_compiler); - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index cb37efb53f7..29b2c1482a9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1395,9 +1395,9 @@ static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_des - vkd3d_free(scan_descriptor_info->descriptors); - } - --static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, -+static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, -- struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) -+ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) - { - struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; - struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; -@@ -1428,27 +1428,27 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - descriptor_info1 = &local_descriptor_info1; - } - -- vkd3d_shader_scan_context_init(&context, &parser->program.shader_version, compile_info, -+ vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, - descriptor_info1, combined_sampler_info, message_context); - - if (TRACE_ON()) -- vkd3d_shader_trace(&parser->program); -+ vkd3d_shader_trace(program); - -- for (i = 0; i < parser->program.instructions.count; ++i) -+ for (i = 0; i < program->instructions.count; ++i) - { -- instruction = &parser->program.instructions.elements[i]; -+ instruction = &program->instructions.elements[i]; - if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) - break; - } - -- for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) -+ for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) - { -- unsigned int size = parser->shader_desc.flat_constant_count[i].external; - struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; - struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; -+ unsigned int size = program->flat_constant_count[i]; - struct vkd3d_shader_descriptor_info1 *d; - -- if (parser->shader_desc.flat_constant_count[i].external) -+ if (size) - { - if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, - &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) -@@ -1458,11 +1458,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - - if (!ret && signature_info) - { -- if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->program.input_signature) -+ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &program->input_signature) - || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, -- &parser->program.output_signature) -+ &program->output_signature) - || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, -- &parser->program.patch_constant_signature)) -+ &program->patch_constant_signature)) - { - ret = VKD3D_ERROR_OUT_OF_MEMORY; - } -@@ -1544,7 +1544,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - } - else - { -- ret = scan_with_parser(compile_info, &message_context, NULL, parser); -+ ret = vsir_program_scan(&parser->program, compile_info, &message_context, NULL); - vkd3d_shader_parser_destroy(parser); - } - } -@@ -1556,12 +1556,11 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - return ret; - } - --int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; -- struct vsir_program *program = &parser->program; - struct vkd3d_shader_compile_info scan_info; - int ret; - -@@ -1574,17 +1573,18 @@ int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - break; - - case VKD3D_SHADER_TARGET_GLSL: -- if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) -+ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) - return ret; -- ret = glsl_compile(program, parser->config_flags, compile_info, out, message_context); -+ ret = glsl_compile(program, config_flags, compile_info, out, message_context); - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - - case VKD3D_SHADER_TARGET_SPIRV_BINARY: - case VKD3D_SHADER_TARGET_SPIRV_TEXT: -- if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) -+ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) - return ret; -- ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); -+ ret = spirv_compile(program, config_flags, &scan_descriptor_info, -+ compile_info, out, message_context); - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - -@@ -1665,7 +1665,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - } - else - { -- ret = vkd3d_shader_parser_compile(parser, compile_info, out, &message_context); -+ ret = vsir_program_compile(&parser->program, parser->config_flags, compile_info, out, &message_context); - vkd3d_shader_parser_destroy(parser); - } - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 4434e6e98f2..07b5818cba9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -527,6 +527,9 @@ enum vkd3d_shader_opcode - VKD3DSIH_UTOD, - VKD3DSIH_UTOF, - VKD3DSIH_UTOU, -+ VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL, -+ VKD3DSIH_WAVE_ALL_TRUE, -+ VKD3DSIH_WAVE_ANY_TRUE, - VKD3DSIH_XOR, - - VKD3DSIH_INVALID, -@@ -590,6 +593,8 @@ enum vkd3d_shader_register_type - VKD3DSPR_OUTSTENCILREF, - VKD3DSPR_UNDEF, - VKD3DSPR_SSA, -+ VKD3DSPR_WAVELANECOUNT, -+ VKD3DSPR_WAVELANEINDEX, - - VKD3DSPR_COUNT, - -@@ -1061,14 +1066,6 @@ struct dxbc_shader_desc - struct shader_signature patch_constant_signature; - }; - --struct vkd3d_shader_desc --{ -- struct -- { -- uint32_t used, external; -- } flat_constant_count[3]; --}; -- - struct vkd3d_shader_register_semantic - { - struct vkd3d_shader_dst_param reg; -@@ -1249,6 +1246,12 @@ static inline bool register_is_scalar_constant_zero(const struct vkd3d_shader_re - && (data_type_is_64_bit(reg->data_type) ? !reg->u.immconst_u64[0] : !reg->u.immconst_u32[0]); - } - -+static inline bool register_is_numeric_array(const struct vkd3d_shader_register *reg) -+{ -+ return (reg->type == VKD3DSPR_IMMCONSTBUFFER || reg->type == VKD3DSPR_IDXTEMP -+ || reg->type == VKD3DSPR_GROUPSHAREDMEM); -+} -+ - static inline bool vsir_register_is_label(const struct vkd3d_shader_register *reg) - { - return reg->type == VKD3DSPR_LABEL; -@@ -1332,6 +1335,7 @@ struct vsir_program - struct shader_signature patch_constant_signature; - - unsigned int input_control_point_count, output_control_point_count; -+ unsigned int flat_constant_count[3]; - unsigned int block_count; - unsigned int temp_count; - unsigned int ssa_count; -@@ -1341,8 +1345,11 @@ struct vsir_program - size_t block_name_count; - }; - --bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); - void vsir_program_cleanup(struct vsir_program *program); -+int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -+ struct vkd3d_shader_message_context *message_context); -+bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); - enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); - enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, -@@ -1366,7 +1373,6 @@ struct vkd3d_shader_parser - struct vkd3d_shader_location location; - bool failed; - -- struct vkd3d_shader_desc shader_desc; - const struct vkd3d_shader_parser_ops *ops; - struct vsir_program program; - -@@ -1378,9 +1384,6 @@ struct vkd3d_shader_parser_ops - void (*parser_destroy)(struct vkd3d_shader_parser *parser); - }; - --int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -- struct vkd3d_shader_message_context *message_context); - void vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); - bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, -@@ -1552,7 +1555,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, - - #define SPIRV_MAX_SRC_COUNT 6 - --int spirv_compile(struct vkd3d_shader_parser *parser, -+int spirv_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 4a69ff530da..95366d3441b 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -2052,20 +2052,15 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - * state when GPU finishes execution of a command list. */ - if (is_swapchain_image) - { -- if (resource->present_state == D3D12_RESOURCE_STATE_PRESENT) -- { -- *access_mask = VK_ACCESS_MEMORY_READ_BIT; -- *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; -- if (image_layout) -- *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; -- return true; -- } -- else if (resource->present_state != D3D12_RESOURCE_STATE_COMMON) -- { -- vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, -+ if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) -+ return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, - resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); -- return true; -- } -+ -+ *access_mask = VK_ACCESS_MEMORY_READ_BIT; -+ *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; -+ if (image_layout) -+ *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; -+ return true; - } - - *access_mask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index cb2b6ad0364..3f3332dd3e3 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -788,6 +788,11 @@ VkInstance vkd3d_instance_get_vk_instance(struct vkd3d_instance *instance) - return instance->vk_instance; - } - -+static bool d3d12_device_environment_is_vulkan_min_1_1(struct d3d12_device *device) -+{ -+ return device->environment == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; -+} -+ - struct vkd3d_physical_device_info - { - /* properties */ -@@ -796,6 +801,7 @@ struct vkd3d_physical_device_info - VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; - VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; - VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; -+ VkPhysicalDeviceSubgroupProperties subgroup_properties; - - VkPhysicalDeviceProperties2KHR properties2; - -@@ -838,6 +844,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; - VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; - struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -+ VkPhysicalDeviceSubgroupProperties *subgroup_properties; - - memset(info, 0, sizeof(*info)); - conditional_rendering_features = &info->conditional_rendering_features; -@@ -857,6 +864,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - formats4444_features = &info->formats4444_features; - xfb_features = &info->xfb_features; - xfb_properties = &info->xfb_properties; -+ subgroup_properties = &info->subgroup_properties; - - info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - -@@ -902,6 +910,9 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - vk_prepend_struct(&info->properties2, xfb_properties); - vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; - vk_prepend_struct(&info->properties2, vertex_divisor_properties); -+ subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; -+ if (d3d12_device_environment_is_vulkan_min_1_1(device)) -+ vk_prepend_struct(&info->properties2, subgroup_properties); - - if (vulkan_info->KHR_get_physical_device_properties2) - VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); -@@ -1509,6 +1520,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - struct vkd3d_physical_device_info *physical_device_info, - uint32_t *device_extension_count, bool **user_extension_supported) - { -+ const VkPhysicalDeviceSubgroupProperties *subgroup_properties = &physical_device_info->subgroup_properties; - const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; - VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; - const struct vkd3d_optional_device_extensions_info *optional_extensions; -@@ -1520,6 +1532,16 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - uint32_t count; - VkResult vr; - -+ /* SHUFFLE is required to implement WaveReadLaneAt with dynamically uniform index before SPIR-V 1.5 / Vulkan 1.2. */ -+ static const VkSubgroupFeatureFlags required_subgroup_features = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT -+ | VK_SUBGROUP_FEATURE_BASIC_BIT -+ | VK_SUBGROUP_FEATURE_BALLOT_BIT -+ | VK_SUBGROUP_FEATURE_SHUFFLE_BIT -+ | VK_SUBGROUP_FEATURE_QUAD_BIT -+ | VK_SUBGROUP_FEATURE_VOTE_BIT; -+ -+ static const VkSubgroupFeatureFlags required_stages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; -+ - *device_extension_count = 0; - - vkd3d_trace_physical_device(physical_device, physical_device_info, vk_procs); -@@ -1583,10 +1605,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - device->feature_options.ResourceHeapTier = D3D12_RESOURCE_HEAP_TIER_2; - - /* Shader Model 6 support. */ -- device->feature_options1.WaveOps = FALSE; -- device->feature_options1.WaveLaneCountMin = 0; -- device->feature_options1.WaveLaneCountMax = 0; -- device->feature_options1.TotalLaneCount = 0; -+ device->feature_options1.WaveOps = subgroup_properties->subgroupSize >= 4 -+ && (subgroup_properties->supportedOperations & required_subgroup_features) == required_subgroup_features -+ && (subgroup_properties->supportedStages & required_stages) == required_stages; -+ device->feature_options1.WaveLaneCountMin = subgroup_properties->subgroupSize; -+ device->feature_options1.WaveLaneCountMax = subgroup_properties->subgroupSize; -+ device->feature_options1.TotalLaneCount = 32 * subgroup_properties->subgroupSize; /* approx. */ - device->feature_options1.ExpandedComputeResourceStates = TRUE; - device->feature_options1.Int64ShaderOps = features->shaderInt64; - -@@ -3434,7 +3458,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 - - TRACE("Request shader model %#x.\n", data->HighestShaderModel); - -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ data->HighestShaderModel = D3D_SHADER_MODEL_6_0; -+#else - data->HighestShaderModel = D3D_SHADER_MODEL_5_1; -+#endif - - TRACE("Shader model %#x.\n", data->HighestShaderModel); - return S_OK; -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 6ba29c18004..199d8043ffe 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -2159,6 +2159,8 @@ static unsigned int feature_flags_compile_option(const struct d3d12_device *devi - flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_INT64; - if (device->feature_options.DoublePrecisionFloatShaderOps) - flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64; -+ if (device->feature_options1.WaveOps) -+ flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS; - - return flags; - } --- -2.43.0 - diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-4b3a948edcb5e83074b63aad25ecf450dca.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-4b3a948edcb5e83074b63aad25ecf450dca.patch deleted file mode 100644 index 3ea4add8..00000000 --- a/patches/vkd3d-latest/0006-Updated-vkd3d-to-4b3a948edcb5e83074b63aad25ecf450dca.patch +++ /dev/null @@ -1,3631 +0,0 @@ -From 669dcd4183544055c8b512d6a60df7536e82b453 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Mon, 13 May 2024 09:29:08 +1000 -Subject: [PATCH] Updated vkd3d to 4b3a948edcb5e83074b63aad25ecf450dcae4130. - ---- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 23 + - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 31 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 321 +++++++++++ - libs/vkd3d/libs/vkd3d-shader/fx.c | 506 ++++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 168 +++--- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 19 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 66 +-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 84 ++- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 170 +++--- - libs/vkd3d/libs/vkd3d-shader/ir.c | 26 + - libs/vkd3d/libs/vkd3d-shader/spirv.c | 94 ++++ - libs/vkd3d/libs/vkd3d-shader/tpf.c | 79 +-- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 16 + - libs/vkd3d/libs/vkd3d/device.c | 177 +++--- - libs/vkd3d/libs/vkd3d/utils.c | 24 + - 15 files changed, 1335 insertions(+), 469 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index b2f329cd199..f2ad39f2f07 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -328,8 +328,20 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_UTOF ] = "utof", - [VKD3DSIH_UTOU ] = "utou", - [VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL ] = "wave_active_all_equal", -+ [VKD3DSIH_WAVE_ACTIVE_BALLOT ] = "wave_active_ballot", -+ [VKD3DSIH_WAVE_ACTIVE_BIT_AND ] = "wave_active_bit_and", -+ [VKD3DSIH_WAVE_ACTIVE_BIT_OR ] = "wave_active_bit_or", -+ [VKD3DSIH_WAVE_ACTIVE_BIT_XOR ] = "wave_active_bit_xor", - [VKD3DSIH_WAVE_ALL_TRUE ] = "wave_all_true", - [VKD3DSIH_WAVE_ANY_TRUE ] = "wave_any_true", -+ [VKD3DSIH_WAVE_OP_ADD ] = "wave_op_add", -+ [VKD3DSIH_WAVE_OP_IMAX ] = "wave_op_imax", -+ [VKD3DSIH_WAVE_OP_IMIN ] = "wave_op_imin", -+ [VKD3DSIH_WAVE_OP_MAX ] = "wave_op_max", -+ [VKD3DSIH_WAVE_OP_MIN ] = "wave_op_min", -+ [VKD3DSIH_WAVE_OP_MUL ] = "wave_op_mul", -+ [VKD3DSIH_WAVE_OP_UMAX ] = "wave_op_umax", -+ [VKD3DSIH_WAVE_OP_UMIN ] = "wave_op_umin", - [VKD3DSIH_XOR ] = "xor", - }; - -@@ -1840,6 +1852,17 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - vkd3d_string_buffer_printf(buffer, "p"); - break; - -+ case VKD3DSIH_WAVE_OP_ADD: -+ case VKD3DSIH_WAVE_OP_IMAX: -+ case VKD3DSIH_WAVE_OP_IMIN: -+ case VKD3DSIH_WAVE_OP_MAX: -+ case VKD3DSIH_WAVE_OP_MIN: -+ case VKD3DSIH_WAVE_OP_MUL: -+ case VKD3DSIH_WAVE_OP_UMAX: -+ case VKD3DSIH_WAVE_OP_UMIN: -+ vkd3d_string_buffer_printf(&compiler->buffer, (ins->flags & VKD3DSI_WAVE_PREFIX) ? "_prefix" : "_active"); -+ break; -+ - case VKD3DSIH_ISHL: - case VKD3DSIH_ISHR: - case VKD3DSIH_USHR: -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index aa2358440e5..24a95224349 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1514,10 +1514,11 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - return D3DXPC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3DXPC_VECTOR; -- case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_VERTEX_SHADER: - return D3DXPC_OBJECT; - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -@@ -1539,7 +1540,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - return D3DXPT_BOOL; -@@ -1600,18 +1601,6 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - } - break; - -- case HLSL_CLASS_OBJECT: -- switch (type->base_type) -- { -- case HLSL_TYPE_PIXELSHADER: -- return D3DXPT_PIXELSHADER; -- case HLSL_TYPE_VERTEXSHADER: -- return D3DXPT_VERTEXSHADER; -- default: -- vkd3d_unreachable(); -- } -- vkd3d_unreachable(); -- - case HLSL_CLASS_ARRAY: - return hlsl_sm1_base_type(type->e.array.type); - -@@ -1621,6 +1610,12 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_STRING: - return D3DXPT_STRING; - -+ case HLSL_CLASS_PIXEL_SHADER: -+ return D3DXPT_PIXELSHADER; -+ -+ case HLSL_CLASS_VERTEX_SHADER: -+ return D3DXPT_VERTEXSHADER; -+ - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: -@@ -2020,11 +2015,11 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); - -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -@@ -2046,7 +2041,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- switch(src_type->base_type) -+ switch(src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -@@ -2308,7 +2303,7 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - return; - } - -- if (instr->data_type->base_type != HLSL_TYPE_FLOAT) -+ if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index e636ad917db..29f736364dc 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -37,6 +37,10 @@ static const size_t MAX_IR_INSTRUCTIONS_PER_DXIL_INSTRUCTION = 11; - - static const unsigned int dx_max_thread_group_size[3] = {1024, 1024, 64}; - -+static const unsigned int MAX_GS_INSTANCE_COUNT = 32; /* kMaxGSInstanceCount */ -+static const unsigned int MAX_GS_OUTPUT_TOTAL_SCALARS = 1024; /* kMaxGSOutputTotalScalars */ -+static const unsigned int MAX_GS_OUTPUT_STREAMS = 4; -+ - #define VKD3D_SHADER_SWIZZLE_64_MASK \ - (VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(0) \ - | VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(1)) -@@ -283,6 +287,18 @@ enum dxil_element_additional_tag - ADDITIONAL_TAG_USED_MASK = 3, - }; - -+enum dxil_input_primitive -+{ -+ INPUT_PRIMITIVE_UNDEFINED = 0, -+ INPUT_PRIMITIVE_POINT = 1, -+ INPUT_PRIMITIVE_LINE = 2, -+ INPUT_PRIMITIVE_TRIANGLE = 3, -+ INPUT_PRIMITIVE_LINEWITHADJACENCY = 6, -+ INPUT_PRIMITIVE_TRIANGLEWITHADJACENY = 7, -+ INPUT_PRIMITIVE_PATCH1 = 8, -+ INPUT_PRIMITIVE_PATCH32 = 39, -+}; -+ - enum dxil_shader_properties_tag - { - SHADER_PROPERTIES_FLAGS = 0, -@@ -419,6 +435,9 @@ enum dx_intrinsic_opcode - DX_GROUP_ID = 94, - DX_THREAD_ID_IN_GROUP = 95, - DX_FLATTENED_THREAD_ID_IN_GROUP = 96, -+ DX_EMIT_STREAM = 97, -+ DX_CUT_STREAM = 98, -+ DX_EMIT_THEN_CUT_STREAM = 99, - DX_MAKE_DOUBLE = 101, - DX_SPLIT_DOUBLE = 102, - DX_LOAD_OUTPUT_CONTROL_POINT = 103, -@@ -432,6 +451,10 @@ enum dx_intrinsic_opcode - DX_WAVE_ANY_TRUE = 113, - DX_WAVE_ALL_TRUE = 114, - DX_WAVE_ACTIVE_ALL_EQUAL = 115, -+ DX_WAVE_ACTIVE_BALLOT = 116, -+ DX_WAVE_ACTIVE_OP = 119, -+ DX_WAVE_ACTIVE_BIT = 120, -+ DX_WAVE_PREFIX_OP = 121, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, - DX_RAW_BUFFER_LOAD = 139, -@@ -533,6 +556,21 @@ enum dxil_sync_flags - SYNC_GROUP_SHARED_MEMORY = 0x8, - }; - -+enum dxil_wave_bit_op_kind -+{ -+ WAVE_BIT_OP_AND = 0, -+ WAVE_BIT_OP_OR = 1, -+ WAVE_BIT_OP_XOR = 2, -+}; -+ -+enum dxil_wave_op_kind -+{ -+ WAVE_OP_ADD = 0, -+ WAVE_OP_MUL = 1, -+ WAVE_OP_MIN = 2, -+ WAVE_OP_MAX = 3, -+}; -+ - struct sm6_pointer_info - { - const struct sm6_type *type; -@@ -4896,6 +4934,38 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int - ins->handler_idx = VKD3DSIH_NOP; - } - -+static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ unsigned int i; -+ -+ vsir_instruction_init(ins, &sm6->p.location, (op == DX_CUT_STREAM) ? VKD3DSIH_CUT_STREAM : VKD3DSIH_EMIT_STREAM); -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ -+ i = sm6_value_get_constant_uint(operands[0]); -+ if (i >= MAX_GS_OUTPUT_STREAMS) -+ { -+ WARN("Invalid stream index %u.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Output stream index %u is invalid.", i); -+ } -+ -+ /* VKD3D_DATA_UNUSED would be more reasonable, but TPF uses data type 0 here. */ -+ register_init_with_id(&src_param->reg, VKD3DSPR_STREAM, 0, i); -+ src_param_init(src_param); -+ -+ if (op == DX_EMIT_THEN_CUT_STREAM) -+ { -+ ++state->ins; -+ ++state->code_block->instruction_count; -+ sm6_parser_emit_dx_stream(sm6, DX_CUT_STREAM, operands, state); -+ } -+} -+ - static void sm6_parser_emit_dx_discard(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -5910,6 +5980,111 @@ static void sm6_parser_emit_dx_texture_store(struct sm6_parser *sm6, enum dx_int - dst_param_init_with_mask(dst_param, write_mask); - } - -+static void sm6_parser_emit_dx_wave_active_ballot(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_WAVE_ACTIVE_BALLOT); -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); -+} -+ -+static enum vkd3d_shader_opcode sm6_dx_map_wave_bit_op(enum dxil_wave_bit_op_kind op, -+ struct sm6_parser *sm6) -+{ -+ switch (op) -+ { -+ case WAVE_BIT_OP_AND: -+ return VKD3DSIH_WAVE_ACTIVE_BIT_AND; -+ case WAVE_BIT_OP_OR: -+ return VKD3DSIH_WAVE_ACTIVE_BIT_OR; -+ case WAVE_BIT_OP_XOR: -+ return VKD3DSIH_WAVE_ACTIVE_BIT_XOR; -+ default: -+ FIXME("Unhandled wave bit op %u.\n", op); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, -+ "Wave bit operation %u is unhandled.\n", op); -+ return VKD3DSIH_INVALID; -+ } -+} -+ -+static void sm6_parser_emit_dx_wave_active_bit(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ enum dxil_wave_bit_op_kind wave_op; -+ enum vkd3d_shader_opcode opcode; -+ -+ wave_op = sm6_value_get_constant_uint(operands[1]); -+ -+ if ((opcode = sm6_dx_map_wave_bit_op(wave_op, sm6)) == VKD3DSIH_INVALID) -+ return; -+ vsir_instruction_init(ins, &sm6->p.location, opcode); -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ -+static enum vkd3d_shader_opcode sm6_dx_map_wave_op(enum dxil_wave_op_kind op, bool is_signed, bool is_float, -+ struct sm6_parser *sm6) -+{ -+ switch (op) -+ { -+ case WAVE_OP_ADD: -+ return VKD3DSIH_WAVE_OP_ADD; -+ case WAVE_OP_MUL: -+ return VKD3DSIH_WAVE_OP_MUL; -+ case WAVE_OP_MIN: -+ if (is_float) -+ return VKD3DSIH_WAVE_OP_MIN; -+ return is_signed ? VKD3DSIH_WAVE_OP_IMIN : VKD3DSIH_WAVE_OP_UMIN; -+ case WAVE_OP_MAX: -+ if (is_float) -+ return VKD3DSIH_WAVE_OP_MAX; -+ return is_signed ? VKD3DSIH_WAVE_OP_IMAX : VKD3DSIH_WAVE_OP_UMAX; -+ default: -+ FIXME("Unhandled wave op %u.\n", op); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, -+ "Wave operation %u is unhandled.\n", op); -+ return VKD3DSIH_INVALID; -+ } -+} -+ -+static void sm6_parser_emit_dx_wave_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ enum vkd3d_shader_opcode opcode; -+ enum dxil_wave_op_kind wave_op; -+ bool is_signed; -+ -+ wave_op = sm6_value_get_constant_uint(operands[1]); -+ is_signed = !sm6_value_get_constant_uint(operands[2]); -+ opcode = sm6_dx_map_wave_op(wave_op, is_signed, sm6_type_is_floating_point(operands[0]->type), sm6); -+ -+ if (opcode == VKD3DSIH_INVALID) -+ return; -+ -+ vsir_instruction_init(ins, &sm6->p.location, opcode); -+ ins->flags = (op == DX_WAVE_PREFIX_OP) ? VKD3DSI_WAVE_PREFIX : 0; -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_wave_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -5954,6 +6129,7 @@ struct sm6_dx_opcode_info - H -> handle - D -> Dimensions - S -> splitdouble -+ V -> 4 x i32 - v -> void - o -> overloaded - R -> matches the return type -@@ -5976,6 +6152,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, - [DX_COVERAGE ] = {"i", "", sm6_parser_emit_dx_coverage}, - [DX_CREATE_HANDLE ] = {"H", "ccib", sm6_parser_emit_dx_create_handle}, -+ [DX_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, - [DX_DERIV_COARSEX ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_COARSEY ] = {"e", "R", sm6_parser_emit_dx_unary}, - [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, -@@ -5985,6 +6162,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_DOT2 ] = {"g", "RRRR", sm6_parser_emit_dx_dot}, - [DX_DOT3 ] = {"g", "RRRRRR", sm6_parser_emit_dx_dot}, - [DX_DOT4 ] = {"g", "RRRRRRRR", sm6_parser_emit_dx_dot}, -+ [DX_EMIT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, -+ [DX_EMIT_THEN_CUT_STREAM ] = {"v", "c", sm6_parser_emit_dx_stream}, - [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, - [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, - [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, -@@ -6051,10 +6230,14 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, - [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, - [DX_WAVE_ACTIVE_ALL_EQUAL ] = {"1", "n", sm6_parser_emit_dx_unary}, -+ [DX_WAVE_ACTIVE_BALLOT ] = {"V", "1", sm6_parser_emit_dx_wave_active_ballot}, -+ [DX_WAVE_ACTIVE_BIT ] = {"m", "Rc", sm6_parser_emit_dx_wave_active_bit}, -+ [DX_WAVE_ACTIVE_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, - [DX_WAVE_ALL_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, - [DX_WAVE_ANY_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, - [DX_WAVE_GET_LANE_COUNT ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, - [DX_WAVE_GET_LANE_INDEX ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, -+ [DX_WAVE_PREFIX_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, - }; - - static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_value *value, char info_type, -@@ -6102,6 +6285,8 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc - return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Dimensions"); - case 'S': - return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.splitdouble"); -+ case 'V': -+ return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.fouri32"); - case 'v': - return !type; - case 'o': -@@ -9336,6 +9521,17 @@ static void sm6_parser_emit_dcl_count(struct sm6_parser *sm6, enum vkd3d_shader_ - ins->declaration.count = count; - } - -+static void sm6_parser_emit_dcl_primitive_topology(struct sm6_parser *sm6, -+ enum vkd3d_shader_opcode handler_idx, enum vkd3d_primitive_type primitive_type, -+ unsigned int patch_vertex_count) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ ins = sm6_parser_add_instruction(sm6, handler_idx); -+ ins->declaration.primitive_type.type = primitive_type; -+ ins->declaration.primitive_type.patch_vertex_count = patch_vertex_count; -+} -+ - static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, - enum vkd3d_tessellator_domain tessellator_domain) - { -@@ -9420,6 +9616,128 @@ static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, - ins->declaration.max_tessellation_factor = max_tessellation_factor; - } - -+static void sm6_parser_gs_properties_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m) -+{ -+ enum vkd3d_primitive_type input_primitive = VKD3D_PT_TRIANGLELIST, output_primitive; -+ unsigned int i, input_control_point_count = 1, patch_vertex_count = 0; -+ const struct sm6_metadata_node *node; -+ unsigned int operands[5] = {0}; -+ -+ if (!m || !sm6_metadata_value_is_node(m)) -+ { -+ WARN("Missing or invalid GS properties.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader properties node is missing or invalid."); -+ return; -+ } -+ -+ node = m->u.node; -+ if (node->operand_count < ARRAY_SIZE(operands)) -+ { -+ WARN("Invalid operand count %u.\n", node->operand_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Geometry shader properties operand count %u is invalid.", node->operand_count); -+ return; -+ } -+ if (node->operand_count > ARRAY_SIZE(operands)) -+ { -+ WARN("Ignoring %zu extra operands.\n", node->operand_count - ARRAY_SIZE(operands)); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %zu extra operands for geometry shader properties.", -+ node->operand_count - ARRAY_SIZE(operands)); -+ } -+ -+ for (i = 0; i < node->operand_count; ++i) -+ { -+ if (!sm6_metadata_get_uint_value(sm6, node->operands[i], &operands[i])) -+ { -+ WARN("GS property at index %u is not a uint value.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader properties operand at index %u is not an integer.", i); -+ } -+ } -+ -+ switch (i = operands[0]) -+ { -+ case INPUT_PRIMITIVE_POINT: -+ input_primitive = VKD3D_PT_POINTLIST; -+ input_control_point_count = 1; -+ break; -+ -+ case INPUT_PRIMITIVE_LINE: -+ input_primitive = VKD3D_PT_LINELIST; -+ input_control_point_count = 2; -+ break; -+ -+ case INPUT_PRIMITIVE_TRIANGLE: -+ input_primitive = VKD3D_PT_TRIANGLELIST; -+ input_control_point_count = 3; -+ break; -+ -+ case INPUT_PRIMITIVE_LINEWITHADJACENCY: -+ input_primitive = VKD3D_PT_LINELIST_ADJ; -+ input_control_point_count = 4; -+ break; -+ -+ case INPUT_PRIMITIVE_TRIANGLEWITHADJACENY: -+ input_primitive = VKD3D_PT_TRIANGLELIST_ADJ; -+ input_control_point_count = 6; -+ break; -+ -+ default: -+ if (i >= INPUT_PRIMITIVE_PATCH1 && i <= INPUT_PRIMITIVE_PATCH32) -+ { -+ input_primitive = VKD3D_PT_PATCH; -+ patch_vertex_count = i - INPUT_PRIMITIVE_PATCH1 + 1; -+ break; -+ } -+ -+ WARN("Unhandled input primitive %u.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader input primitive %u is unhandled.", i); -+ break; -+ } -+ -+ sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_INPUT_PRIMITIVE, input_primitive, patch_vertex_count); -+ sm6->p.program.input_control_point_count = input_control_point_count; -+ -+ i = operands[1]; -+ /* Max total scalar count sets an upper limit. We would need to scan outputs to be more precise. */ -+ if (i > MAX_GS_OUTPUT_TOTAL_SCALARS) -+ { -+ WARN("GS output vertex count %u invalid.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader output vertex count %u is invalid.", i); -+ } -+ sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_VERTICES_OUT, i); -+ -+ if (operands[2] > 1) -+ { -+ FIXME("Unhandled stream mask %#x.\n", operands[2]); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader stream mask %#x is unhandled.", operands[2]); -+ } -+ -+ output_primitive = operands[3]; -+ if (output_primitive == VKD3D_PT_UNDEFINED || output_primitive >= VKD3D_PT_COUNT) -+ { -+ WARN("Unhandled output primitive %u.\n", output_primitive); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader output primitive %u is unhandled.", output_primitive); -+ output_primitive = VKD3D_PT_TRIANGLELIST; -+ } -+ sm6_parser_emit_dcl_primitive_topology(sm6, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, output_primitive, 0); -+ -+ i = operands[4]; -+ if (!i || i > MAX_GS_INSTANCE_COUNT) -+ { -+ WARN("GS instance count %u invalid.\n", i); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -+ "Geometry shader instance count %u is invalid.", i); -+ } -+ sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_GS_INSTANCES, i); -+} -+ - static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_parser *sm6, - const struct sm6_metadata_value *m) - { -@@ -9610,6 +9928,9 @@ static enum vkd3d_result sm6_parser_entry_point_init(struct sm6_parser *sm6) - case SHADER_PROPERTIES_FLAGS: - sm6_parser_emit_global_flags(sm6, node->operands[i + 1]); - break; -+ case SHADER_PROPERTIES_GEOMETRY: -+ sm6_parser_gs_properties_init(sm6, node->operands[i + 1]); -+ break; - case SHADER_PROPERTIES_DOMAIN: - tessellator_domain = sm6_parser_ds_properties_init(sm6, node->operands[i + 1]); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 168378e6b42..6fb2e8a0f0b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -85,8 +85,13 @@ struct fx_write_context - uint32_t numeric_variable_count; - uint32_t object_variable_count; - uint32_t shared_object_count; -- uint32_t shader_variable_count; -+ uint32_t shader_count; - uint32_t parameter_count; -+ uint32_t dsv_count; -+ uint32_t rtv_count; -+ uint32_t texture_count; -+ uint32_t uav_count; -+ uint32_t sampler_state_count; - int status; - - bool child_effect; -@@ -169,7 +174,7 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co - struct fx_write_context *fx) - { - unsigned int version = ctx->profile->major_version; -- struct hlsl_block block; -+ struct hlsl_ir_var *var; - - memset(fx, 0, sizeof(*fx)); - -@@ -197,9 +202,15 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co - fx->child_effect = fx->ops->are_child_effects_supported && ctx->child_effect; - fx->include_empty_buffers = version == 4 && ctx->include_empty_buffers; - -- hlsl_block_init(&block); -- hlsl_prepend_global_uniform_copy(fx->ctx, &block); -- hlsl_block_cleanup(&block); -+ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -+ { -+ list_add_tail(&ctx->extern_vars, &var->extern_entry); -+ var->is_uniform = 1; -+ } -+ } -+ - hlsl_calculate_buffer_offsets(fx->ctx); - } - -@@ -292,6 +303,14 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) - return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; - } - -+static const uint32_t fx_4_numeric_base_type[] = -+{ -+ [HLSL_TYPE_FLOAT] = 1, -+ [HLSL_TYPE_INT ] = 2, -+ [HLSL_TYPE_UINT ] = 3, -+ [HLSL_TYPE_BOOL ] = 4, -+}; -+ - static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx) - { - static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3; -@@ -304,13 +323,6 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - [HLSL_CLASS_VECTOR] = 2, - [HLSL_CLASS_MATRIX] = 3, - }; -- static const uint32_t numeric_base_type[] = -- { -- [HLSL_TYPE_FLOAT] = 1, -- [HLSL_TYPE_INT ] = 2, -- [HLSL_TYPE_UINT ] = 3, -- [HLSL_TYPE_BOOL ] = 4, -- }; - struct hlsl_ctx *ctx = fx->ctx; - uint32_t value = 0; - -@@ -326,16 +338,16 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - return 0; - } - -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: -- value |= (numeric_base_type[type->base_type] << NUMERIC_BASE_TYPE_SHIFT); -+ value |= (fx_4_numeric_base_type[type->e.numeric.type] << NUMERIC_BASE_TYPE_SHIFT); - break; - default: -- hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->base_type); -+ hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->e.numeric.type); - return 0; - } - -@@ -349,11 +361,6 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - - static const char * get_fx_4_type_name(const struct hlsl_type *type) - { -- static const char * const object_type_names[] = -- { -- [HLSL_TYPE_PIXELSHADER] = "PixelShader", -- [HLSL_TYPE_VERTEXSHADER] = "VertexShader", -- }; - static const char * const texture_type_names[] = - { - [HLSL_SAMPLER_DIM_GENERIC] = "texture", -@@ -380,6 +387,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - - switch (type->class) - { -+ case HLSL_CLASS_SAMPLER: -+ return "SamplerState"; -+ - case HLSL_CLASS_TEXTURE: - return texture_type_names[type->sampler_dim]; - -@@ -392,15 +402,11 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - case HLSL_CLASS_RENDER_TARGET_VIEW: - return "RenderTargetView"; - -- case HLSL_CLASS_OBJECT: -- switch (type->base_type) -- { -- case HLSL_TYPE_PIXELSHADER: -- case HLSL_TYPE_VERTEXSHADER: -- return object_type_names[type->base_type]; -- default: -- return type->name; -- } -+ case HLSL_CLASS_VERTEX_SHADER: -+ return "VertexShader"; -+ -+ case HLSL_CLASS_PIXEL_SHADER: -+ return "PixelShader"; - - default: - return type->name; -@@ -413,7 +419,6 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - uint32_t name_offset, offset, size, stride, numeric_desc; - uint32_t elements_count = 0; - const char *name; -- struct hlsl_ctx *ctx = fx->ctx; - - /* Resolve arrays to element type and number of elements. */ - if (type->class == HLSL_CLASS_ARRAY) -@@ -436,10 +441,12 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - break; - - case HLSL_CLASS_DEPTH_STENCIL_VIEW: -- case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: - put_u32_unaligned(buffer, 2); - break; - -@@ -453,7 +460,6 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_TECHNIQUE: - vkd3d_unreachable(); - -- case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_VOID: - FIXME("Writing type class %u is not implemented.\n", type->class); -@@ -509,6 +515,10 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - - put_u32_unaligned(buffer, texture_type[type->sampler_dim]); - } -+ else if (type->class == HLSL_CLASS_SAMPLER) -+ { -+ put_u32_unaligned(buffer, 21); -+ } - else if (type->class == HLSL_CLASS_UAV) - { - static const uint32_t uav_type[] = -@@ -532,24 +542,13 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - { - put_u32_unaligned(buffer, 19); - } -- else if (type->class == HLSL_CLASS_OBJECT) -+ else if (type->class == HLSL_CLASS_PIXEL_SHADER) - { -- static const uint32_t object_type[] = -- { -- [HLSL_TYPE_PIXELSHADER] = 5, -- [HLSL_TYPE_VERTEXSHADER] = 6, -- }; -- -- switch (type->base_type) -- { -- case HLSL_TYPE_PIXELSHADER: -- case HLSL_TYPE_VERTEXSHADER: -- put_u32_unaligned(buffer, object_type[type->base_type]); -- break; -- default: -- hlsl_fixme(ctx, &ctx->location, "Object type %u is not supported.", type->base_type); -- return 0; -- } -+ put_u32_unaligned(buffer, 5); -+ } -+ else if (type->class == HLSL_CLASS_VERTEX_SHADER) -+ { -+ put_u32_unaligned(buffer, 6); - } - else if (hlsl_is_numeric_type(type)) - { -@@ -838,20 +837,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - } - break; - -- case HLSL_CLASS_OBJECT: -- switch (type->base_type) -- { -- case HLSL_TYPE_PIXELSHADER: -- case HLSL_TYPE_VERTEXSHADER: -- hlsl_fixme(ctx, loc, "Write fx 2.0 parameter object type %#x.", type->base_type); -- return false; -- -- default: -- return false; -- } -- -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -+ case HLSL_CLASS_VERTEX_SHADER: - hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); - return false; - -@@ -1012,6 +1001,317 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write - hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); - } - -+struct rhs_named_value -+{ -+ const char *name; -+ unsigned int value; -+}; -+ -+static bool get_fx_4_state_enum_value(const struct rhs_named_value *pairs, -+ const char *name, unsigned int *value) -+{ -+ while (pairs->name) -+ { -+ if (!ascii_strcasecmp(pairs->name, name)) -+ { -+ *value = pairs->value; -+ return true; -+ } -+ -+ pairs++; -+ } -+ -+ return false; -+} -+ -+static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ struct hlsl_type *data_type = value->node.data_type; -+ struct hlsl_ctx *ctx = fx->ctx; -+ uint32_t i, type, offset; -+ unsigned int count = hlsl_type_component_count(data_type); -+ -+ offset = put_u32_unaligned(buffer, count); -+ -+ for (i = 0; i < count; ++i) -+ { -+ if (hlsl_is_numeric_type(data_type)) -+ { -+ switch (data_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ type = fx_4_numeric_base_type[data_type->e.numeric.type]; -+ break; -+ default: -+ type = 0; -+ hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); -+ } -+ } -+ -+ put_u32_unaligned(buffer, type); -+ put_u32_unaligned(buffer, value->value.u[i].u); -+ } -+ -+ return offset; -+} -+ -+static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, -+ struct fx_write_context *fx) -+{ -+ uint32_t value_offset = 0, assignment_type = 0, rhs_offset; -+ uint32_t type_offset; -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_ir_node *value = entry->args->node; -+ -+ if (entry->lhs_has_index) -+ hlsl_fixme(ctx, &var->loc, "Unsupported assignment to array element."); -+ -+ put_u32(buffer, entry->name_id); -+ put_u32(buffer, 0); /* TODO: destination index */ -+ type_offset = put_u32(buffer, 0); -+ rhs_offset = put_u32(buffer, 0); -+ -+ switch (value->type) -+ { -+ case HLSL_IR_CONSTANT: -+ { -+ struct hlsl_ir_constant *c = hlsl_ir_constant(value); -+ -+ value_offset = write_fx_4_state_numeric_value(c, fx); -+ assignment_type = 1; -+ break; -+ } -+ default: -+ hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); -+ } -+ -+ set_u32(buffer, type_offset, assignment_type); -+ set_u32(buffer, rhs_offset, value_offset); -+} -+ -+static bool state_block_contains_state(const char *name, unsigned int start, struct hlsl_state_block *block) -+{ -+ unsigned int i; -+ -+ for (i = start; i < block->count; ++i) -+ { -+ if (!ascii_strcasecmp(block->entries[i]->name, name)) -+ return true; -+ } -+ -+ return false; -+} -+ -+struct replace_state_context -+{ -+ const struct rhs_named_value *values; -+ struct hlsl_ir_var *var; -+}; -+ -+static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct replace_state_context *replace_context = context; -+ struct hlsl_ir_stateblock_constant *state_constant; -+ struct hlsl_ir_node *c; -+ unsigned int value; -+ -+ if (!replace_context->values) -+ return false; -+ if (instr->type != HLSL_IR_STATEBLOCK_CONSTANT) -+ return false; -+ -+ state_constant = hlsl_ir_stateblock_constant(instr); -+ if (!get_fx_4_state_enum_value(replace_context->values, state_constant->name, &value)) -+ { -+ hlsl_error(ctx, &replace_context->var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Unrecognized state constant %s.", state_constant->name); -+ return false; -+ } -+ -+ if (!(c = hlsl_new_uint_constant(ctx, value, &replace_context->var->loc))) -+ return false; -+ -+ list_add_before(&state_constant->node.entry, &c->entry); -+ hlsl_replace_node(&state_constant->node, c); -+ -+ return true; -+} -+ -+static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, -+ struct fx_write_context *fx) -+{ -+ static const struct rhs_named_value filter_values[] = -+ { -+ { "MIN_MAG_MIP_POINT", 0x00 }, -+ { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, -+ { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, -+ { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, -+ { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, -+ { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, -+ { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, -+ { "MIN_MAG_MIP_LINEAR", 0x15 }, -+ { "ANISOTROPIC", 0x55 }, -+ { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, -+ { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, -+ { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, -+ { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, -+ { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, -+ { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, -+ { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, -+ { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, -+ { "COMPARISON_ANISOTROPIC", 0xd5 }, -+ { NULL }, -+ }; -+ -+ static const struct rhs_named_value address_values[] = -+ { -+ { "WRAP", 1 }, -+ { "MIRROR", 2 }, -+ { "CLAMP", 3 }, -+ { "BORDER", 4 }, -+ { "MIRROR_ONCE", 5 }, -+ { NULL }, -+ }; -+ -+ static const struct rhs_named_value compare_func_values[] = -+ { -+ { "NEVER", 1 }, -+ { "LESS", 2 }, -+ { "EQUAL", 3 }, -+ { "LESS_EQUAL", 4 }, -+ { "GREATER", 5 }, -+ { "NOT_EQUAL", 6 }, -+ { "GREATER_EQUAL", 7 }, -+ { "ALWAYS", 8 }, -+ { NULL } -+ }; -+ -+ static const struct state -+ { -+ const char *name; -+ enum hlsl_type_class container; -+ enum hlsl_base_type type; -+ unsigned int dimx; -+ uint32_t id; -+ const struct rhs_named_value *values; -+ } -+ states[] = -+ { -+ { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, -+ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, -+ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, -+ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, -+ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, -+ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, -+ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, -+ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, -+ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, -+ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, -+ /* TODO: "Texture" field */ -+ }; -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ struct replace_state_context replace_context; -+ struct hlsl_ir_node *node, *cast; -+ const struct state *state = NULL; -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_type *state_type; -+ unsigned int i; -+ bool progress; -+ -+ for (i = 0; i < ARRAY_SIZE(states); ++i) -+ { -+ if (type->class == states[i].container -+ && !ascii_strcasecmp(entry->name, states[i].name)) -+ { -+ state = &states[i]; -+ break; -+ } -+ } -+ -+ if (!state) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unrecognized state name %s.", entry->name); -+ return; -+ } -+ -+ if (entry->args_count != 1) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unrecognized initializer for the state %s.", -+ entry->name); -+ return; -+ } -+ -+ entry->name_id = state->id; -+ -+ replace_context.values = state->values; -+ replace_context.var = var; -+ -+ /* Turned named constants to actual constants. */ -+ hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); -+ -+ if (state->dimx) -+ state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); -+ else -+ state_type = hlsl_get_scalar_type(ctx, state->type); -+ -+ /* Cast to expected property type. */ -+ node = entry->args->node; -+ if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) -+ return; -+ list_add_after(&node->entry, &cast->entry); -+ -+ hlsl_src_remove(entry->args); -+ hlsl_src_from_node(entry->args, cast); -+ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); -+ } while (progress); -+} -+ -+static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i, j; -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t count_offset, count; -+ -+ for (i = 0; i < elements_count; ++i) -+ { -+ struct hlsl_state_block *block; -+ -+ count_offset = put_u32(buffer, 0); -+ -+ count = 0; -+ if (var->state_blocks) -+ { -+ block = var->state_blocks[i]; -+ -+ for (j = 0; j < block->count; ++j) -+ { -+ struct hlsl_state_block_entry *entry = block->entries[j]; -+ -+ /* Skip if property is reassigned later. This will use the last assignment. */ -+ if (state_block_contains_state(entry->name, j + 1, block)) -+ continue; -+ -+ /* Resolve special constant names and property names. */ -+ resolve_fx_4_state_block_values(var, entry, fx); -+ -+ write_fx_4_state_assignment(var, entry, fx); -+ ++count; -+ } -+ } -+ -+ set_u32(buffer, count_offset, count); -+ } -+} -+ - static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) - { - const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -@@ -1044,29 +1344,35 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - switch (type->class) - { - case HLSL_CLASS_RENDER_TARGET_VIEW: -+ fx->rtv_count += elements_count; -+ break; - case HLSL_CLASS_TEXTURE: -+ fx->texture_count += elements_count; -+ break; - case HLSL_CLASS_UAV: -+ fx->uav_count += elements_count; - break; - -- case HLSL_CLASS_OBJECT: -- switch (type->base_type) -- { -- case HLSL_TYPE_PIXELSHADER: -- case HLSL_TYPE_VERTEXSHADER: -- /* FIXME: write shader blobs, once parser support works. */ -- for (i = 0; i < elements_count; ++i) -- put_u32(buffer, 0); -- ++fx->shader_variable_count; -- break; -- default: -- hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", -- type->base_type); -- } -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_VERTEX_SHADER: -+ /* FIXME: write shader blobs, once parser support works. */ -+ for (i = 0; i < elements_count; ++i) -+ put_u32(buffer, 0); -+ fx->shader_count += elements_count; -+ break; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ fx->dsv_count += elements_count; -+ break; -+ -+ case HLSL_CLASS_SAMPLER: -+ write_fx_4_state_object_initializer(var, fx); -+ fx->sampler_state_count += elements_count; - break; - - default: - hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", -- type->base_type); -+ type->e.numeric.type); - } - - put_u32(buffer, 0); /* Annotations count */ -@@ -1143,27 +1449,26 @@ static void write_buffers(struct fx_write_context *fx) - } - } - --static bool is_object_variable(const struct hlsl_ir_var *var) -+static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) - { - const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); - - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: -+ return true; - case HLSL_CLASS_UAV: -+ if (ctx->profile->major_version < 5) -+ return false; -+ if (type->e.resource.rasteriser_ordered) -+ return false; -+ return true; -+ case HLSL_CLASS_VERTEX_SHADER: - return true; -- -- case HLSL_CLASS_OBJECT: -- switch (type->base_type) -- { -- case HLSL_TYPE_PIXELSHADER: -- case HLSL_TYPE_VERTEXSHADER: -- return true; -- default: -- return false; -- } - - default: - return false; -@@ -1172,14 +1477,15 @@ static bool is_object_variable(const struct hlsl_ir_var *var) - - static void write_objects(struct fx_write_context *fx, bool shared) - { -+ struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_ir_var *var; - - if (shared && !fx->child_effect) - return; - -- LIST_FOR_EACH_ENTRY(var, &fx->ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (!is_object_variable(var)) -+ if (!is_supported_object_variable(ctx, var)) - continue; - - if (fx->child_effect && (shared != !!(var->storage_modifiers & HLSL_STORAGE_SHARED))) -@@ -1216,14 +1522,14 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, fx.technique_count); - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ -- put_u32(&buffer, 0); /* Texture object count. */ -+ put_u32(&buffer, fx.texture_count); - put_u32(&buffer, 0); /* Depth stencil state count. */ - put_u32(&buffer, 0); /* Blend state count. */ - put_u32(&buffer, 0); /* Rasterizer state count. */ -- put_u32(&buffer, 0); /* Sampler state count. */ -- put_u32(&buffer, 0); /* Rendertarget view count. */ -- put_u32(&buffer, 0); /* Depth stencil view count. */ -- put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ -+ put_u32(&buffer, fx.sampler_state_count); -+ put_u32(&buffer, fx.rtv_count); -+ put_u32(&buffer, fx.dsv_count); -+ put_u32(&buffer, fx.shader_count); - put_u32(&buffer, 0); /* Inline shader count. */ - - set_u32(&buffer, size_offset, fx.unstructured.size); -@@ -1274,17 +1580,17 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, fx.technique_count); - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ -- put_u32(&buffer, 0); /* Texture object count. */ -+ put_u32(&buffer, fx.texture_count); - put_u32(&buffer, 0); /* Depth stencil state count. */ - put_u32(&buffer, 0); /* Blend state count. */ - put_u32(&buffer, 0); /* Rasterizer state count. */ -- put_u32(&buffer, 0); /* Sampler state count. */ -- put_u32(&buffer, 0); /* Rendertarget view count. */ -- put_u32(&buffer, 0); /* Depth stencil view count. */ -- put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ -+ put_u32(&buffer, fx.sampler_state_count); -+ put_u32(&buffer, fx.rtv_count); -+ put_u32(&buffer, fx.dsv_count); -+ put_u32(&buffer, fx.shader_count); - put_u32(&buffer, 0); /* Inline shader count. */ - put_u32(&buffer, fx.group_count); /* Group count. */ -- put_u32(&buffer, 0); /* UAV count. */ -+ put_u32(&buffer, fx.uav_count); - put_u32(&buffer, 0); /* Interface variables count. */ - put_u32(&buffer, 0); /* Interface variable element count. */ - put_u32(&buffer, 0); /* Class instance elements count. */ -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 0b48b17d21c..96e73d23d72 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -136,7 +136,11 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) - - static void free_state_block_entry(struct hlsl_state_block_entry *entry) - { -+ unsigned int i; -+ - vkd3d_free(entry->name); -+ for (i = 0; i < entry->args_count; ++i) -+ hlsl_src_remove(&entry->args[i]); - vkd3d_free(entry->args); - hlsl_block_cleanup(entry->instrs); - vkd3d_free(entry->instrs); -@@ -365,11 +369,12 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -- case HLSL_CLASS_OBJECT: - case HLSL_CLASS_PASS: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: - break; - } -@@ -416,7 +421,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e - return NULL; - } - type->class = type_class; -- type->base_type = base_type; -+ type->e.numeric.type = base_type; - type->dimx = dimx; - type->dimy = dimy; - hlsl_type_calculate_reg_size(ctx, type); -@@ -431,13 +436,14 @@ static bool type_is_single_component(const struct hlsl_type *type) - switch (type->class) - { - case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SCALAR: -- case HLSL_CLASS_OBJECT: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: - return true; - - case HLSL_CLASS_VECTOR: -@@ -475,7 +481,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - { - case HLSL_CLASS_VECTOR: - assert(index < type->dimx); -- *type_ptr = hlsl_get_scalar_type(ctx, type->base_type); -+ *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); - *index_ptr = 0; - return index; - -@@ -485,7 +491,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - bool row_major = hlsl_type_is_row_major(type); - - assert(index < type->dimx * type->dimy); -- *type_ptr = hlsl_get_vector_type(ctx, type->base_type, row_major ? type->dimx : type->dimy); -+ *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); - *index_ptr = row_major ? x : y; - return row_major ? y : x; - } -@@ -572,12 +578,13 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - break; - - case HLSL_CLASS_DEPTH_STENCIL_VIEW: -- case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: - assert(idx == 0); - break; - -@@ -758,13 +765,13 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co - switch (type->class) - { - case HLSL_CLASS_VECTOR: -- return hlsl_get_scalar_type(ctx, type->base_type); -+ return hlsl_get_scalar_type(ctx, type->e.numeric.type); - - case HLSL_CLASS_MATRIX: - if (hlsl_type_is_row_major(type)) -- return hlsl_get_vector_type(ctx, type->base_type, type->dimx); -+ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); - else -- return hlsl_get_vector_type(ctx, type->base_type, type->dimy); -+ return hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimy); - - case HLSL_CLASS_ARRAY: - return type->e.array.type; -@@ -950,12 +957,13 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; - - case HLSL_CLASS_DEPTH_STENCIL_VIEW: -- case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: - return 1; - - case HLSL_CLASS_EFFECT_GROUP: -@@ -975,55 +983,73 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - - if (t1->class != t2->class) - return false; -- if (t1->base_type != t2->base_type) -- return false; -- if (t1->class == HLSL_CLASS_SAMPLER || t1->class == HLSL_CLASS_TEXTURE || t1->class == HLSL_CLASS_UAV) -- { -- if (t1->sampler_dim != t2->sampler_dim) -- return false; -- if ((t1->class == HLSL_CLASS_TEXTURE || t1->class == HLSL_CLASS_UAV) -- && t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC -- && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) -- return false; -- if (t1->class == HLSL_CLASS_UAV && t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) -- return false; -- } -- if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) -- != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) -- return false; -- if (t1->dimx != t2->dimx) -- return false; -- if (t1->dimy != t2->dimy) -- return false; -- if (t1->class == HLSL_CLASS_STRUCT) -- { -- size_t i; - -- if (t1->e.record.field_count != t2->e.record.field_count) -- return false; -- -- for (i = 0; i < t1->e.record.field_count; ++i) -- { -- const struct hlsl_struct_field *field1 = &t1->e.record.fields[i]; -- const struct hlsl_struct_field *field2 = &t2->e.record.fields[i]; -+ switch (t1->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ if (t1->e.numeric.type != t2->e.numeric.type) -+ return false; -+ if ((t1->modifiers & HLSL_MODIFIER_ROW_MAJOR) -+ != (t2->modifiers & HLSL_MODIFIER_ROW_MAJOR)) -+ return false; -+ if (t1->dimx != t2->dimx) -+ return false; -+ if (t1->dimy != t2->dimy) -+ return false; -+ return true; - -- if (!hlsl_types_are_equal(field1->type, field2->type)) -+ case HLSL_CLASS_UAV: -+ if (t1->e.resource.rasteriser_ordered != t2->e.resource.rasteriser_ordered) -+ return false; -+ /* fall through */ -+ case HLSL_CLASS_TEXTURE: -+ if (t1->sampler_dim != HLSL_SAMPLER_DIM_GENERIC -+ && !hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format)) - return false; -+ /* fall through */ -+ case HLSL_CLASS_SAMPLER: -+ if (t1->sampler_dim != t2->sampler_dim) -+ return false; -+ return true; - -- if (strcmp(field1->name, field2->name)) -+ case HLSL_CLASS_STRUCT: -+ if (t1->e.record.field_count != t2->e.record.field_count) - return false; -- } -- } -- if (t1->class == HLSL_CLASS_ARRAY) -- return t1->e.array.elements_count == t2->e.array.elements_count -- && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); -- if (t1->class == HLSL_CLASS_TECHNIQUE) -- { -- if (t1->e.version != t2->e.version) -- return false; -+ -+ for (size_t i = 0; i < t1->e.record.field_count; ++i) -+ { -+ const struct hlsl_struct_field *field1 = &t1->e.record.fields[i]; -+ const struct hlsl_struct_field *field2 = &t2->e.record.fields[i]; -+ -+ if (!hlsl_types_are_equal(field1->type, field2->type)) -+ return false; -+ -+ if (strcmp(field1->name, field2->name)) -+ return false; -+ } -+ return true; -+ -+ case HLSL_CLASS_ARRAY: -+ return t1->e.array.elements_count == t2->e.array.elements_count -+ && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); -+ -+ case HLSL_CLASS_TECHNIQUE: -+ return t1->e.version == t2->e.version; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_EFFECT_GROUP: -+ case HLSL_CLASS_PASS: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_VOID: -+ return true; - } - -- return true; -+ vkd3d_unreachable(); - } - - struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, -@@ -1044,7 +1070,6 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - } - } - type->class = old->class; -- type->base_type = old->base_type; - type->dimx = old->dimx; - type->dimy = old->dimy; - type->modifiers = old->modifiers | modifiers; -@@ -1056,6 +1081,12 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - - switch (old->class) - { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ type->e.numeric.type = old->e.numeric.type; -+ break; -+ - case HLSL_CLASS_ARRAY: - if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) - { -@@ -1643,10 +1674,11 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned - - if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) - return NULL; -+ assert(hlsl_is_numeric_type(val->data_type)); - if (components == 1) -- type = hlsl_get_scalar_type(ctx, val->data_type->base_type); -+ type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); - else -- type = hlsl_get_vector_type(ctx, val->data_type->base_type, components); -+ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components); - init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); - hlsl_src_from_node(&swizzle->val, val); - swizzle->swizzle = s; -@@ -1709,7 +1741,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v - if (type->class == HLSL_CLASS_TEXTURE || type->class == HLSL_CLASS_UAV) - type = type->e.resource.format; - else if (type->class == HLSL_CLASS_MATRIX) -- type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); -+ type = hlsl_get_vector_type(ctx, type->e.numeric.type, type->dimx); - else - type = hlsl_get_element_type_from_path_index(ctx, type, idx); - -@@ -2295,18 +2327,18 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - switch (type->class) - { - case HLSL_CLASS_SCALAR: -- assert(type->base_type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "%s", base_types[type->base_type]); -+ assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); - return string; - - case HLSL_CLASS_VECTOR: -- assert(type->base_type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "%s%u", base_types[type->base_type], type->dimx); -+ assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); - return string; - - case HLSL_CLASS_MATRIX: -- assert(type->base_type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->base_type], type->dimy, type->dimx); -+ assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); - return string; - - case HLSL_CLASS_ARRAY: -@@ -2343,7 +2375,8 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - return string; - } - -- assert(type->e.resource.format->base_type < ARRAY_SIZE(base_types)); -+ assert(hlsl_is_numeric_type(type->e.resource.format)); -+ assert(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); - if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - { - vkd3d_string_buffer_printf(string, "Buffer"); -@@ -2376,12 +2409,13 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -- case HLSL_CLASS_OBJECT: - case HLSL_CLASS_PASS: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: - break; - } -@@ -2665,7 +2699,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl - { - const union hlsl_constant_value_component *value = &constant->value.u[x]; - -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - vkd3d_string_buffer_printf(buffer, "%s ", value->u ? "true" : "false"); -@@ -3557,8 +3591,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, - {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, - {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, -- {"pixelshader", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, -- {"vertexshader", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, - }; - - static const struct -@@ -3682,9 +3714,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "vertexshader", HLSL_CLASS_VERTEX_SHADER)); - - for (i = 0; i < ARRAY_SIZE(effect_types); ++i) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index c3a4c6bd291..a89e43f9bf2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -78,16 +78,17 @@ enum hlsl_type_class - HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, - HLSL_CLASS_STRUCT, - HLSL_CLASS_ARRAY, -- HLSL_CLASS_OBJECT, - HLSL_CLASS_DEPTH_STENCIL_VIEW, - HLSL_CLASS_EFFECT_GROUP, - HLSL_CLASS_PASS, -+ HLSL_CLASS_PIXEL_SHADER, - HLSL_CLASS_RENDER_TARGET_VIEW, - HLSL_CLASS_SAMPLER, - HLSL_CLASS_STRING, - HLSL_CLASS_TECHNIQUE, - HLSL_CLASS_TEXTURE, - HLSL_CLASS_UAV, -+ HLSL_CLASS_VERTEX_SHADER, - HLSL_CLASS_VOID, - }; - -@@ -100,8 +101,6 @@ enum hlsl_base_type - HLSL_TYPE_UINT, - HLSL_TYPE_BOOL, - HLSL_TYPE_LAST_SCALAR = HLSL_TYPE_BOOL, -- HLSL_TYPE_PIXELSHADER, -- HLSL_TYPE_VERTEXSHADER, - }; - - enum hlsl_sampler_dim -@@ -143,10 +142,6 @@ struct hlsl_type - struct rb_entry scope_entry; - - enum hlsl_type_class class; -- /* If class is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. -- * If class is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. -- * Otherwise, base_type is not used. */ -- enum hlsl_base_type base_type; - - /* If class is HLSL_CLASS_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. - * If class is HLSL_CLASS_TEXTURE, then sampler_dim can be any value of the enum except -@@ -177,6 +172,11 @@ struct hlsl_type - - union - { -+ /* Additional information if type is numeric. */ -+ struct -+ { -+ enum hlsl_base_type type; -+ } numeric; - /* Additional information if type is HLSL_CLASS_STRUCT. */ - struct - { -@@ -474,6 +474,8 @@ struct hlsl_state_block_entry - { - /* For assignments, the name in the lhs. */ - char *name; -+ /* Resolved format-specific property identifier. */ -+ unsigned int name_id; - - /* Whether the lhs in the assignment is indexed and, in that case, its index. */ - bool lhs_has_index; -@@ -483,7 +485,7 @@ struct hlsl_state_block_entry - struct hlsl_block *instrs; - - /* For assignments, arguments of the rhs initializer. */ -- struct hlsl_ir_node **args; -+ struct hlsl_src *args; - unsigned int args_count; - }; - -@@ -1400,7 +1402,6 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int - bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); - - void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx); --void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block); - - const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); - unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 0eed15c5a91..79317bb0545 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -1352,9 +1352,6 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) - { -- if (t1->base_type > HLSL_TYPE_LAST_SCALAR || t2->base_type > HLSL_TYPE_LAST_SCALAR) -- return false; -- - /* Scalar vars can be converted to pretty much everything */ - if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) - return true; -@@ -1386,10 +1383,6 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t - - static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hlsl_base_type t2) - { -- if (t1 > HLSL_TYPE_LAST_SCALAR || t2 > HLSL_TYPE_LAST_SCALAR) { -- FIXME("Unexpected base type.\n"); -- return HLSL_TYPE_FLOAT; -- } - if (t1 == t2) - return t1 == HLSL_TYPE_BOOL ? HLSL_TYPE_INT : t1; - if (t1 == HLSL_TYPE_DOUBLE || t2 == HLSL_TYPE_DOUBLE) -@@ -1493,7 +1486,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl - struct hlsl_ir_node *load; - struct hlsl_ir_var *var; - -- scalar_type = hlsl_get_scalar_type(ctx, type->base_type); -+ scalar_type = hlsl_get_scalar_type(ctx, type->e.numeric.type); - - if (!(var = hlsl_new_synthetic_var(ctx, "split_op", type, loc))) - return NULL; -@@ -1543,7 +1536,7 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * - const struct hlsl_type *type = instr->data_type; - struct vkd3d_string_buffer *string; - -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: -@@ -1593,13 +1586,13 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct - static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, - const struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); - enum hlsl_type_class type; -+ enum hlsl_base_type base; - unsigned int dimx, dimy; - - if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) - return NULL; -- -+ base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); - return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - } - -@@ -1636,14 +1629,15 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str - const struct vkd3d_shader_location *loc) - { - struct hlsl_type *common_type, *return_type; -- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); - enum hlsl_type_class type; -+ enum hlsl_base_type base; - unsigned int dimx, dimy; - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - - if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) - return NULL; - -+ base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); - common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -@@ -1683,7 +1677,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type base = arg1->data_type->base_type; -+ enum hlsl_base_type base = arg1->data_type->e.numeric.type; - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *return_type, *integer_type; - enum hlsl_type_class type; -@@ -1713,7 +1707,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h - static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); -+ enum hlsl_base_type base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *common_type, *ret_type; - enum hlsl_ir_expr_op op; -@@ -1964,7 +1958,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - "Resource store expressions must write to all components."); - - assert(coords->data_type->class == HLSL_CLASS_VECTOR); -- assert(coords->data_type->base_type == HLSL_TYPE_UINT); -+ assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); - assert(coords->data_type->dimx == dim_count); - - if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) -@@ -2603,7 +2597,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, - { - struct hlsl_type *type = arg->data_type; - -- if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) -+ if (type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF) - return arg; - - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -@@ -2630,7 +2624,7 @@ static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *p - static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type base = params->args[0]->data_type->base_type; -+ enum hlsl_base_type base = params->args[0]->data_type->e.numeric.type; - bool vectors = false, matrices = false; - unsigned int dimx = 4, dimy = 4; - struct hlsl_type *common_type; -@@ -2640,7 +2634,7 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * - { - struct hlsl_type *arg_type = params->args[i]->data_type; - -- base = expr_common_base_type(base, arg_type->base_type); -+ base = expr_common_base_type(base, arg_type->e.numeric.type); - - if (arg_type->class == HLSL_CLASS_VECTOR) - { -@@ -2697,7 +2691,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; - -- base_type = type->base_type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; -+ base_type = type->e.numeric.type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; - type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); - - return convert_args(ctx, params, type, loc); -@@ -2921,7 +2915,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, - struct hlsl_type *data_type; - - data_type = params->args[0]->data_type; -- if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) -+ if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) - { - struct vkd3d_string_buffer *string; - -@@ -2957,7 +2951,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, - } - - data_type = params->args[0]->data_type; -- if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) -+ if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) - { - struct vkd3d_string_buffer *string; - -@@ -3086,7 +3080,7 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, - struct hlsl_type *cast_type; - enum hlsl_base_type base; - -- if (arg1->data_type->base_type == HLSL_TYPE_HALF && arg2->data_type->base_type == HLSL_TYPE_HALF) -+ if (arg1->data_type->e.numeric.type == HLSL_TYPE_HALF && arg2->data_type->e.numeric.type == HLSL_TYPE_HALF) - base = HLSL_TYPE_HALF; - else - base = HLSL_TYPE_FLOAT; -@@ -3267,7 +3261,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, - return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); - } - -- typename = type->base_type == HLSL_TYPE_HALF ? "half" : "float"; -+ typename = type->e.numeric.type == HLSL_TYPE_HALF ? "half" : "float"; - template = templates[dim]; - - switch (dim) -@@ -3621,7 +3615,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1], *cast1, *cast2; -- enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); -+ enum hlsl_base_type base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type); - struct hlsl_type *cast_type1 = arg1->data_type, *cast_type2 = arg2->data_type, *matrix_type, *ret_type; - unsigned int i, j, k, vect_count = 0; - struct hlsl_deref var_deref; -@@ -3824,7 +3818,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, - if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) - return false; - -- base = expr_common_base_type(res_type->base_type, i_type->base_type); -+ base = expr_common_base_type(res_type->e.numeric.type, i_type->e.numeric.type); - base = base == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT; - res_type = convert_numeric_type(ctx, res_type, base); - idx_type = convert_numeric_type(ctx, i_type, base); -@@ -3884,7 +3878,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, - struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, - arg->data_type->dimx, arg->data_type->dimy); - -- if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) -+ if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type), &zero_value, loc))) - return false; - hlsl_block_add_instr(params->instrs, zero); - -@@ -4257,7 +4251,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - return true; - } - -- mat_type = hlsl_get_matrix_type(ctx, arg_type->base_type, arg_type->dimy, arg_type->dimx); -+ mat_type = hlsl_get_matrix_type(ctx, arg_type->e.numeric.type, arg_type->dimy, arg_type->dimx); - - if (!(var = hlsl_new_synthetic_var(ctx, "transpose", mat_type, loc))) - return false; -@@ -4553,7 +4547,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - if (common_type->dimx == 1 && common_type->dimy == 1) - { - common_type = hlsl_get_numeric_type(ctx, cond_type->class, -- common_type->base_type, cond_type->dimx, cond_type->dimy); -+ common_type->e.numeric.type, cond_type->dimx, cond_type->dimy); - } - else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy) - { -@@ -4603,7 +4597,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - common_type = first->data_type; - } - -- assert(cond->data_type->base_type == HLSL_TYPE_BOOL); -+ assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - args[0] = cond; - args[1] = first; -@@ -4926,7 +4920,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *bloc - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; - -- load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->base_type, 4); -+ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource.format->e.numeric.type, 4); - load_params.resource = object; - load_params.sampler = params->args[0]; - -@@ -6585,7 +6579,7 @@ type_no_void: - YYABORT; - } - -- $$ = hlsl_type_clone(ctx, hlsl_get_vector_type(ctx, $3->base_type, $5), 0, 0); -+ $$ = hlsl_type_clone(ctx, hlsl_get_vector_type(ctx, $3->e.numeric.type, $5), 0, 0); - $$->is_minimum_precision = $3->is_minimum_precision; - } - | KW_VECTOR -@@ -6618,7 +6612,7 @@ type_no_void: - YYABORT; - } - -- $$ = hlsl_type_clone(ctx, hlsl_get_matrix_type(ctx, $3->base_type, $7, $5), 0, 0); -+ $$ = hlsl_type_clone(ctx, hlsl_get_matrix_type(ctx, $3->e.numeric.type, $7, $5), 0, 0); - $$->is_minimum_precision = $3->is_minimum_precision; - } - | KW_MATRIX -@@ -6918,6 +6912,7 @@ state_block: - | state_block stateblock_lhs_identifier state_block_index_opt '=' complex_initializer ';' - { - struct hlsl_state_block_entry *entry; -+ unsigned int i; - - if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) - YYABORT; -@@ -6927,8 +6922,13 @@ state_block: - entry->lhs_index = $3.index; - - entry->instrs = $5.instrs; -- entry->args = $5.args; -+ - entry->args_count = $5.args_count; -+ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) -+ YYABORT; -+ for (i = 0; i < entry->args_count; ++i) -+ hlsl_src_from_node(&entry->args[i], $5.args[i]); -+ vkd3d_free($5.args); - - $$ = $1; - state_block_add_entry($$, entry); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 94acb70fff9..8882deaf6cd 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -263,8 +263,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls - if (type1->dimx != type2->dimx) - return false; - -- return base_type_get_semantic_equivalent(type1->base_type) -- == base_type_get_semantic_equivalent(type2->base_type); -+ return base_type_get_semantic_equivalent(type1->e.numeric.type) -+ == base_type_get_semantic_equivalent(type2->e.numeric.type); - } - - static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -@@ -355,10 +355,10 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - if (!semantic->name) - return; - -- vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ vector_type_dst = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); - vector_type_src = vector_type_dst; - if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -- vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4); -+ vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); - - for (i = 0; i < hlsl_type_major_size(type); ++i) - { -@@ -500,7 +500,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - if (!semantic->name) - return; - -- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); - - for (i = 0; i < hlsl_type_major_size(type); ++i) - { -@@ -1101,7 +1101,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - struct hlsl_ir_node *resource_load; - - assert(coords->data_type->class == HLSL_CLASS_VECTOR); -- assert(coords->data_type->base_type == HLSL_TYPE_UINT); -+ assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); - assert(coords->data_type->dimx == dim_count); - - if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) -@@ -1191,7 +1191,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s - { - struct hlsl_ir_node *new_cast, *swizzle; - -- dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); -+ dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->e.numeric.type); - /* We need to preserve the cast since it might be doing more than just - * turning the scalar into a vector. */ - if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) -@@ -1625,10 +1625,11 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -- case HLSL_CLASS_OBJECT: -+ case HLSL_CLASS_VERTEX_SHADER: - break; - - case HLSL_CLASS_MATRIX: -@@ -2064,7 +2065,7 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - src_type = expr->operands[0].node->data_type; - - if (hlsl_types_are_equal(src_type, dst_type) -- || (src_type->base_type == dst_type->base_type && is_vec1(src_type) && is_vec1(dst_type))) -+ || (src_type->e.numeric.type == dst_type->e.numeric.type && is_vec1(src_type) && is_vec1(dst_type))) - { - hlsl_replace_node(&expr->node, expr->operands[0].node); - return true; -@@ -2191,7 +2192,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - type = rhs->data_type; - if (type->class != HLSL_CLASS_MATRIX) - return false; -- element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ element_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); - - if (rhs->type != HLSL_IR_LOAD) - { -@@ -2228,7 +2229,7 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - { - struct hlsl_ir_node *new_cast, *swizzle; - -- dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); -+ dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->dimx); - /* We need to preserve the cast since it might be doing more than just - * narrowing the vector. */ - if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) -@@ -2482,7 +2483,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - - op = HLSL_OP2_DOT; - if (type->dimx == 1) -- op = type->base_type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; -+ op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; - - /* Note: We may be creating a DOT for bool vectors here, which we need to lower to - * LOGIC_OR + LOGIC_AND. */ -@@ -2676,9 +2677,9 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return false; - - arg = expr->operands[0].node; -- if (instr->data_type->base_type != HLSL_TYPE_INT && instr->data_type->base_type != HLSL_TYPE_UINT) -+ if (instr->data_type->e.numeric.type != HLSL_TYPE_INT && instr->data_type->e.numeric.type != HLSL_TYPE_UINT) - return false; -- if (arg->data_type->base_type != HLSL_TYPE_FLOAT && arg->data_type->base_type != HLSL_TYPE_HALF) -+ if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) - return false; - - if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) -@@ -2935,7 +2936,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st - float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); - - /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ -- assert(arg->data_type->base_type == HLSL_TYPE_BOOL); -+ assert(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) - return false; -@@ -2991,7 +2992,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return false; - } - -- assert(cond->data_type->base_type == HLSL_TYPE_BOOL); -+ assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, - instr->data_type->dimx, instr->data_type->dimy); -@@ -3285,7 +3286,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - arg_type = expr->operands[0].node->data_type; - if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_BOOL) -+ if (type->e.numeric.type != HLSL_TYPE_BOOL) - return false; - - /* Narrowing casts should have already been lowered. */ -@@ -3313,7 +3314,7 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc - - assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); - -- if (cond_type->base_type != HLSL_TYPE_BOOL) -+ if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) - { - cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy); - -@@ -3349,7 +3350,7 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return false; - if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_INT) -+ if (type->e.numeric.type != HLSL_TYPE_INT) - return false; - utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); - -@@ -3415,7 +3416,7 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return false; - if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_INT) -+ if (type->e.numeric.type != HLSL_TYPE_INT) - return false; - utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); - -@@ -3474,7 +3475,7 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return false; - if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_INT) -+ if (type->e.numeric.type != HLSL_TYPE_INT) - return false; - - arg = expr->operands[0].node; -@@ -3505,14 +3506,14 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - if (expr->op != HLSL_OP2_DOT) - return false; - -- if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT -- || type->base_type == HLSL_TYPE_BOOL) -+ if (type->e.numeric.type == HLSL_TYPE_INT || type->e.numeric.type == HLSL_TYPE_UINT -+ || type->e.numeric.type == HLSL_TYPE_BOOL) - { - arg1 = expr->operands[0].node; - arg2 = expr->operands[1].node; - assert(arg1->data_type->dimx == arg2->data_type->dimx); - dimx = arg1->data_type->dimx; -- is_bool = type->base_type == HLSL_TYPE_BOOL; -+ is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; - - if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) - return false; -@@ -3558,7 +3559,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return false; - if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; -- if (type->base_type != HLSL_TYPE_FLOAT) -+ if (type->e.numeric.type != HLSL_TYPE_FLOAT) - return false; - btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); - -@@ -3614,7 +3615,7 @@ static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - if (instr->type != HLSL_IR_EXPR) - return false; - expr = hlsl_ir_expr(instr); -- if (expr->op == HLSL_OP1_CAST || instr->data_type->base_type == HLSL_TYPE_FLOAT) -+ if (expr->op == HLSL_OP1_CAST || instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT) - return false; - - switch (expr->op) -@@ -4453,7 +4454,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - continue; - value = &constant->value.u[i++]; - -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - f = !!value->u; -@@ -5046,7 +5047,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - - /* We should always have generated a cast to UINT. */ - assert(path_node->data_type->class == HLSL_CLASS_SCALAR -- && path_node->data_type->base_type == HLSL_TYPE_UINT); -+ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; - -@@ -5122,7 +5123,7 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - - /* We should always have generated a cast to UINT. */ - assert(path_node->data_type->class == HLSL_CLASS_SCALAR -- && path_node->data_type->base_type == HLSL_TYPE_UINT); -+ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; - -@@ -5162,7 +5163,7 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - { - /* We should always have generated a cast to UINT. */ - assert(offset_node->data_type->class == HLSL_CLASS_SCALAR -- && offset_node->data_type->base_type == HLSL_TYPE_UINT); -+ && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - assert(offset_node->type != HLSL_IR_CONSTANT); - return false; - } -@@ -5229,7 +5230,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - const struct hlsl_ir_constant *constant; - - if (type->class != HLSL_CLASS_SCALAR -- || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) -+ || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) - { - struct vkd3d_string_buffer *string; - -@@ -5248,8 +5249,8 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - } - constant = hlsl_ir_constant(instr); - -- if ((type->base_type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) -- || (type->base_type == HLSL_TYPE_UINT && !constant->value.u[0].u)) -+ if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) -+ || (type->e.numeric.type == HLSL_TYPE_UINT && !constant->value.u[0].u)) - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, - "Thread count must be a positive integer."); - -@@ -5313,17 +5314,6 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod - } - } - --void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *body) --{ -- struct hlsl_ir_var *var; -- -- LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -- { -- if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -- prepend_uniform_copy(ctx, body, var); -- } --} -- - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) - { -@@ -5352,7 +5342,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_matrix_swizzles, body); - lower_ir(ctx, lower_index_loads, body); - -- hlsl_prepend_global_uniform_copy(ctx, body); -+ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -+ prepend_uniform_copy(ctx, body, var); -+ } - - for (i = 0; i < entry_func->parameters.count; ++i) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 51f2f9cc050..16015fa8a81 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -25,10 +25,10 @@ - static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -116,10 +116,10 @@ static int32_t double_to_int(double x) - static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -158,7 +158,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src->node.data_type->base_type) -+ switch (src->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -200,7 +200,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - vkd3d_unreachable(); - } - -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -231,10 +231,10 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -257,10 +257,10 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -283,10 +283,10 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -309,11 +309,11 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - float i; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -336,10 +336,10 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -381,10 +381,10 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -415,10 +415,10 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -439,10 +439,10 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -484,10 +484,10 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -524,10 +524,10 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -550,10 +550,10 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->base_type); -+ assert(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -595,11 +595,11 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -632,11 +632,11 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -659,11 +659,11 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -686,11 +686,11 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const - static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -712,11 +712,11 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - assert(src1->node.data_type->dimx == src2->node.data_type->dimx); - - dst->u[0].f = 0.0f; -@@ -740,12 +740,12 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -- assert(type == src3->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); -+ assert(type == src3->node.data_type->e.numeric.type); - assert(src1->node.data_type->dimx == src2->node.data_type->dimx); - assert(src3->node.data_type->dimx == 1); - -@@ -771,11 +771,11 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, - const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -841,12 +841,12 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co - { - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -877,12 +877,12 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -916,12 +916,12 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - { - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -955,14 +955,14 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->base_type == src1->node.data_type->base_type); -- assert(src2->node.data_type->base_type == HLSL_TYPE_INT); -+ assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -+ assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - - for (k = 0; k < dst_type->dimx; ++k) - { - unsigned int shift = src2->value.u[k].u % 32; - -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_INT: - dst->u[k].i = src1->value.u[k].i << shift; -@@ -983,11 +983,11 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1021,11 +1021,11 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1060,11 +1060,11 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, - const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1102,11 +1102,11 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; -+ enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->e.numeric.type); -+ assert(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1139,12 +1139,12 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -1175,9 +1175,9 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - { - unsigned int k; - -- assert(dst_type->base_type == src2->node.data_type->base_type); -- assert(dst_type->base_type == src3->node.data_type->base_type); -- assert(src1->node.data_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ assert(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); -+ assert(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); - - for (k = 0; k < dst_type->dimx; ++k) - dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; -@@ -1190,14 +1190,14 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->base_type == src1->node.data_type->base_type); -- assert(src2->node.data_type->base_type == HLSL_TYPE_INT); -+ assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -+ assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - - for (k = 0; k < dst_type->dimx; ++k) - { - unsigned int shift = src2->value.u[k].u % 32; - -- switch (src1->node.data_type->base_type) -+ switch (src1->node.data_type->e.numeric.type) - { - case HLSL_TYPE_INT: - dst->u[k].i = src1->value.u[k].i >> shift; -@@ -1403,7 +1403,7 @@ static bool constant_is_zero(struct hlsl_ir_constant *const_arg) - - for (k = 0; k < data_type->dimx; ++k) - { -- switch (data_type->base_type) -+ switch (data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -1437,7 +1437,7 @@ static bool constant_is_one(struct hlsl_ir_constant *const_arg) - - for (k = 0; k < data_type->dimx; ++k) - { -- switch (data_type->base_type) -+ switch (data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 121b0fe3a6c..c6ecbdd9e46 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -5753,6 +5753,32 @@ static void vsir_validate_instruction(struct validation_context *ctx) - case VKD3DSIH_DCL_OUTPUT: - return; - -+ case VKD3DSIH_DCL_INPUT_PRIMITIVE: -+ if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -+ || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", -+ instruction->declaration.primitive_type.type); -+ return; -+ -+ case VKD3DSIH_DCL_VERTICES_OUT: -+ if (instruction->declaration.count > 1024) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", -+ instruction->declaration.count); -+ return; -+ -+ case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: -+ if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -+ || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", -+ instruction->declaration.primitive_type.type); -+ return; -+ -+ case VKD3DSIH_DCL_GS_INSTANCES: -+ if (!instruction->declaration.count || instruction->declaration.count > 32) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", -+ instruction->declaration.count); -+ return; -+ - case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: - if (!instruction->declaration.count || instruction->declaration.count > 32) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index dc9e8c06a5e..813e20fdcd7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -1752,6 +1752,14 @@ static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *bu - return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); - } - -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); -+ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpGroupNonUniformBallot, -+ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); -+} -+ - static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, - enum GLSLstd450 op, uint32_t result_type, uint32_t operand) - { -@@ -9793,6 +9801,76 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - -+static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id; -+ -+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); -+ val_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_ballot(builder, type_id, val_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) -+{ -+ switch (handler_idx) -+ { -+ case VKD3DSIH_WAVE_ACTIVE_BIT_AND: -+ return SpvOpGroupNonUniformBitwiseAnd; -+ case VKD3DSIH_WAVE_ACTIVE_BIT_OR: -+ return SpvOpGroupNonUniformBitwiseOr; -+ case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: -+ return SpvOpGroupNonUniformBitwiseXor; -+ case VKD3DSIH_WAVE_OP_ADD: -+ return is_float ? SpvOpGroupNonUniformFAdd : SpvOpGroupNonUniformIAdd; -+ case VKD3DSIH_WAVE_OP_IMAX: -+ return SpvOpGroupNonUniformSMax; -+ case VKD3DSIH_WAVE_OP_IMIN: -+ return SpvOpGroupNonUniformSMin; -+ case VKD3DSIH_WAVE_OP_MAX: -+ return SpvOpGroupNonUniformFMax; -+ case VKD3DSIH_WAVE_OP_MIN: -+ return SpvOpGroupNonUniformFMin; -+ case VKD3DSIH_WAVE_OP_MUL: -+ return is_float ? SpvOpGroupNonUniformFMul : SpvOpGroupNonUniformIMul; -+ case VKD3DSIH_WAVE_OP_UMAX: -+ return SpvOpGroupNonUniformUMax; -+ case VKD3DSIH_WAVE_OP_UMIN: -+ return SpvOpGroupNonUniformUMin; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id; -+ SpvOp op; -+ -+ op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformArithmetic); -+ val_id = vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, op, type_id, -+ vkd3d_spirv_get_op_scope_subgroup(builder), -+ (instruction->flags & VKD3DSI_WAVE_PREFIX) ? SpvGroupOperationExclusiveScan : SpvGroupOperationReduce, -+ val_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ - /* This function is called after declarations are processed. */ - static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) - { -@@ -10142,6 +10220,22 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_WAVE_ANY_TRUE: - spirv_compiler_emit_wave_bool_op(compiler, instruction); - break; -+ case VKD3DSIH_WAVE_ACTIVE_BALLOT: -+ spirv_compiler_emit_wave_active_ballot(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_ACTIVE_BIT_AND: -+ case VKD3DSIH_WAVE_ACTIVE_BIT_OR: -+ case VKD3DSIH_WAVE_ACTIVE_BIT_XOR: -+ case VKD3DSIH_WAVE_OP_ADD: -+ case VKD3DSIH_WAVE_OP_IMAX: -+ case VKD3DSIH_WAVE_OP_IMIN: -+ case VKD3DSIH_WAVE_OP_MAX: -+ case VKD3DSIH_WAVE_OP_MIN: -+ case VKD3DSIH_WAVE_OP_MUL: -+ case VKD3DSIH_WAVE_OP_UMAX: -+ case VKD3DSIH_WAVE_OP_UMIN: -+ spirv_compiler_emit_wave_alu_op(compiler, instruction); -+ break; - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 708ab6268a7..4e3bef9640c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2744,7 +2744,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - - static bool type_is_integer(const struct hlsl_type *type) - { -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: -@@ -2933,7 +2933,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, usage_idx); - put_u32(&buffer, usage); -- switch (var->data_type->base_type) -+ switch (var->data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -@@ -3009,14 +3009,15 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_STRUCT: -- case HLSL_CLASS_OBJECT: - case HLSL_CLASS_PASS: -+ case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: - break; - } -@@ -3025,7 +3026,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - - static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) - { -- switch (type->base_type) -+ switch (type->e.numeric.type) - { - case HLSL_TYPE_BOOL: - return D3D_SVT_BOOL; -@@ -3089,7 +3090,7 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - - put_u32(buffer, field->name_bytecode_offset); - put_u32(buffer, field->type->bytecode_offset); -- put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); -+ put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float)); - } - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID)); - put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type))); -@@ -3139,7 +3140,7 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type - if (type->class == HLSL_CLASS_ARRAY) - return sm4_resource_format(type->e.array.type); - -- switch (type->e.resource.format->base_type) -+ switch (type->e.resource.format->e.numeric.type) - { - case HLSL_TYPE_DOUBLE: - return D3D_RETURN_TYPE_DOUBLE; -@@ -4727,11 +4728,11 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - -- assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -- if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - - sm4_dst_from_node(&instr.dsts[0], dst); -@@ -4756,11 +4757,11 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir - return; - } - -- assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_RESINFO; -- if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) - instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - - sm4_dst_from_node(&instr.dsts[0], dst); -@@ -4775,7 +4776,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir - - static bool type_is_float(const struct hlsl_type *type) - { -- return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; -+ return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; - } - - static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, -@@ -4812,11 +4813,11 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); - -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -@@ -4845,7 +4846,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_TYPE_INT: -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -@@ -4871,7 +4872,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_TYPE_UINT: -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -@@ -4941,7 +4942,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - switch (expr->op) - { - case HLSL_OP1_ABS: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS); -@@ -5022,12 +5023,12 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP1_LOGIC_NOT: -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_NEG: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG); -@@ -5080,7 +5081,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_ADD: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); -@@ -5112,7 +5113,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_DIV: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); -@@ -5128,7 +5129,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_DOT: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - switch (arg1->data_type->dimx) -@@ -5160,9 +5161,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); -@@ -5186,9 +5187,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); -@@ -5215,9 +5216,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); -@@ -5241,23 +5242,23 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - - case HLSL_OP2_LOGIC_AND: -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: - assert(type_is_integer(dst_type)); -- assert(dst_type->base_type != HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_MAX: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); -@@ -5277,7 +5278,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_MIN: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); -@@ -5297,7 +5298,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_MOD: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); -@@ -5309,7 +5310,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_MUL: -- switch (dst_type->base_type) -+ switch (dst_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); -@@ -5331,9 +5332,9 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - -- switch (src_type->base_type) -+ switch (src_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); -@@ -5355,8 +5356,8 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - - case HLSL_OP2_RSHIFT: - assert(type_is_integer(dst_type)); -- assert(dst_type->base_type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, -+ assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; - -@@ -5458,7 +5459,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo - instr.dst_count = 1; - - assert(hlsl_is_numeric_type(type)); -- if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) -+ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) - { - struct hlsl_constant_value value; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 07b5818cba9..bf9d3038f08 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -223,6 +223,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW = 9016, - VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, - VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION = 9018, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019, - - VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, - }; -@@ -528,8 +529,20 @@ enum vkd3d_shader_opcode - VKD3DSIH_UTOF, - VKD3DSIH_UTOU, - VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL, -+ VKD3DSIH_WAVE_ACTIVE_BALLOT, -+ VKD3DSIH_WAVE_ACTIVE_BIT_AND, -+ VKD3DSIH_WAVE_ACTIVE_BIT_OR, -+ VKD3DSIH_WAVE_ACTIVE_BIT_XOR, - VKD3DSIH_WAVE_ALL_TRUE, - VKD3DSIH_WAVE_ANY_TRUE, -+ VKD3DSIH_WAVE_OP_ADD, -+ VKD3DSIH_WAVE_OP_IMAX, -+ VKD3DSIH_WAVE_OP_IMIN, -+ VKD3DSIH_WAVE_OP_MAX, -+ VKD3DSIH_WAVE_OP_MIN, -+ VKD3DSIH_WAVE_OP_MUL, -+ VKD3DSIH_WAVE_OP_UMAX, -+ VKD3DSIH_WAVE_OP_UMIN, - VKD3DSIH_XOR, - - VKD3DSIH_INVALID, -@@ -793,6 +806,7 @@ enum vkd3d_tessellator_domain - #define VKD3DSI_SAMPLE_INFO_UINT 0x1 - #define VKD3DSI_SAMPLER_COMPARISON_MODE 0x1 - #define VKD3DSI_SHIFT_UNMASKED 0x1 -+#define VKD3DSI_WAVE_PREFIX 0x1 - - #define VKD3DSI_PRECISE_X 0x100 - #define VKD3DSI_PRECISE_Y 0x200 -@@ -1151,6 +1165,8 @@ enum vkd3d_primitive_type - VKD3D_PT_TRIANGLELIST_ADJ = 12, - VKD3D_PT_TRIANGLESTRIP_ADJ = 13, - VKD3D_PT_PATCH = 14, -+ -+ VKD3D_PT_COUNT = 15, - }; - - struct vkd3d_shader_primitive_type -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 3f3332dd3e3..36d8433939a 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -272,13 +272,15 @@ static bool has_extension(const VkExtensionProperties *extensions, - - for (i = 0; i < count; ++i) - { -- if (is_extension_disabled(extension_name)) -- { -- WARN("Extension %s is disabled.\n", debugstr_a(extension_name)); -- continue; -- } - if (!strcmp(extensions[i].extensionName, extension_name)) -+ { -+ if (is_extension_disabled(extension_name)) -+ { -+ WARN("Extension %s is disabled.\n", debugstr_a(extension_name)); -+ return false; -+ } - return true; -+ } - } - return false; - } -@@ -422,8 +424,6 @@ static HRESULT vkd3d_init_instance_caps(struct vkd3d_instance *instance, - ERR("Failed to enumerate instance extensions, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } -- if (!count) -- return S_OK; - - if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) - return E_OUTOFMEMORY; -@@ -869,29 +869,41 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - - conditional_rendering_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; -- vk_prepend_struct(&info->features2, conditional_rendering_features); -+ if (vulkan_info->EXT_conditional_rendering) -+ vk_prepend_struct(&info->features2, conditional_rendering_features); - depth_clip_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; -- vk_prepend_struct(&info->features2, depth_clip_features); -+ if (vulkan_info->EXT_depth_clip_enable) -+ vk_prepend_struct(&info->features2, depth_clip_features); - descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; -- vk_prepend_struct(&info->features2, descriptor_indexing_features); -+ if (vulkan_info->EXT_descriptor_indexing) -+ vk_prepend_struct(&info->features2, descriptor_indexing_features); - fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; -- vk_prepend_struct(&info->features2, fragment_shader_interlock_features); -+ if (vulkan_info->EXT_fragment_shader_interlock) -+ vk_prepend_struct(&info->features2, fragment_shader_interlock_features); - robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; -- vk_prepend_struct(&info->features2, robustness2_features); -+ if (vulkan_info->EXT_robustness2) -+ vk_prepend_struct(&info->features2, robustness2_features); - demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; -- vk_prepend_struct(&info->features2, demote_features); -+ if (vulkan_info->EXT_shader_demote_to_helper_invocation) -+ vk_prepend_struct(&info->features2, demote_features); - buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; -- vk_prepend_struct(&info->features2, buffer_alignment_features); -+ if (vulkan_info->EXT_texel_buffer_alignment) -+ vk_prepend_struct(&info->features2, buffer_alignment_features); - xfb_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; -- vk_prepend_struct(&info->features2, xfb_features); -+ if (vulkan_info->EXT_transform_feedback) -+ vk_prepend_struct(&info->features2, xfb_features); - vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; -- vk_prepend_struct(&info->features2, vertex_divisor_features); -+ if (vulkan_info->EXT_vertex_attribute_divisor) -+ vk_prepend_struct(&info->features2, vertex_divisor_features); - timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; -- vk_prepend_struct(&info->features2, timeline_semaphore_features); -+ if (vulkan_info->KHR_timeline_semaphore) -+ vk_prepend_struct(&info->features2, timeline_semaphore_features); - mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; -- vk_prepend_struct(&info->features2, mutable_features); -+ if (vulkan_info->EXT_mutable_descriptor_type) -+ vk_prepend_struct(&info->features2, mutable_features); - formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; -- vk_prepend_struct(&info->features2, formats4444_features); -+ if (vulkan_info->EXT_4444_formats) -+ vk_prepend_struct(&info->features2, formats4444_features); - - if (vulkan_info->KHR_get_physical_device_properties2) - VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); -@@ -901,15 +913,20 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i - info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - - maintenance3_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; -- vk_prepend_struct(&info->properties2, maintenance3_properties); -+ if (vulkan_info->KHR_maintenance3) -+ vk_prepend_struct(&info->properties2, maintenance3_properties); - descriptor_indexing_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; -- vk_prepend_struct(&info->properties2, descriptor_indexing_properties); -+ if (vulkan_info->EXT_descriptor_indexing) -+ vk_prepend_struct(&info->properties2, descriptor_indexing_properties); - buffer_alignment_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; -- vk_prepend_struct(&info->properties2, buffer_alignment_properties); -+ if (vulkan_info->EXT_texel_buffer_alignment) -+ vk_prepend_struct(&info->properties2, buffer_alignment_properties); - xfb_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; -- vk_prepend_struct(&info->properties2, xfb_properties); -+ if (vulkan_info->EXT_transform_feedback) -+ vk_prepend_struct(&info->properties2, xfb_properties); - vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; -- vk_prepend_struct(&info->properties2, vertex_divisor_properties); -+ if (vulkan_info->EXT_vertex_attribute_divisor) -+ vk_prepend_struct(&info->properties2, vertex_divisor_properties); - subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; - if (d3d12_device_environment_is_vulkan_min_1_1(device)) - vk_prepend_struct(&info->properties2, subgroup_properties); -@@ -1515,6 +1532,61 @@ static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct - return true; - } - -+static HRESULT vkd3d_check_device_extensions(struct d3d12_device *device, -+ const struct vkd3d_device_create_info *create_info, VkExtensionProperties **vk_extensions, -+ uint32_t *vk_extension_count, uint32_t *device_extension_count, bool **user_extension_supported) -+{ -+ const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; -+ const struct vkd3d_optional_device_extensions_info *optional_extensions; -+ VkPhysicalDevice physical_device = device->vk_physical_device; -+ struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -+ VkResult vr; -+ -+ *device_extension_count = 0; -+ -+ if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, vk_extension_count, NULL))) < 0) -+ { -+ ERR("Failed to enumerate device extensions, vr %d.\n", vr); -+ return hresult_from_vk_result(vr); -+ } -+ -+ if (!(*vk_extensions = vkd3d_calloc(*vk_extension_count, sizeof(**vk_extensions)))) -+ return E_OUTOFMEMORY; -+ -+ TRACE("Enumerating %u device extensions.\n", *vk_extension_count); -+ if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, vk_extension_count, *vk_extensions))) < 0) -+ { -+ ERR("Failed to enumerate device extensions, vr %d.\n", vr); -+ vkd3d_free(*vk_extensions); -+ return hresult_from_vk_result(vr); -+ } -+ -+ optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); -+ if (optional_extensions && optional_extensions->extension_count) -+ { -+ if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) -+ { -+ vkd3d_free(*vk_extensions); -+ return E_OUTOFMEMORY; -+ } -+ } -+ else -+ { -+ *user_extension_supported = NULL; -+ } -+ -+ *device_extension_count = vkd3d_check_extensions(*vk_extensions, *vk_extension_count, -+ required_device_extensions, ARRAY_SIZE(required_device_extensions), -+ optional_device_extensions, ARRAY_SIZE(optional_device_extensions), -+ create_info->device_extensions, create_info->device_extension_count, -+ optional_extensions ? optional_extensions->extensions : NULL, -+ optional_extensions ? optional_extensions->extension_count : 0, -+ *user_extension_supported, vulkan_info, "device", -+ device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); -+ -+ return S_OK; -+} -+ - static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - const struct vkd3d_device_create_info *create_info, - struct vkd3d_physical_device_info *physical_device_info, -@@ -1523,14 +1595,13 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - const VkPhysicalDeviceSubgroupProperties *subgroup_properties = &physical_device_info->subgroup_properties; - const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; - VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; -- const struct vkd3d_optional_device_extensions_info *optional_extensions; - VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; - VkPhysicalDevice physical_device = device->vk_physical_device; - struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; - VkExtensionProperties *vk_extensions; - VkPhysicalDeviceFeatures *features; -- uint32_t count; -- VkResult vr; -+ uint32_t vk_extension_count; -+ HRESULT hr; - - /* SHUFFLE is required to implement WaveReadLaneAt with dynamically uniform index before SPIR-V 1.5 / Vulkan 1.2. */ - static const VkSubgroupFeatureFlags required_subgroup_features = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT -@@ -1542,7 +1613,11 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - static const VkSubgroupFeatureFlags required_stages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - -- *device_extension_count = 0; -+ if (FAILED(hr = vkd3d_check_device_extensions(device, create_info, &vk_extensions, &vk_extension_count, -+ device_extension_count, user_extension_supported))) -+ return hr; -+ -+ vkd3d_physical_device_info_init(physical_device_info, device); - - vkd3d_trace_physical_device(physical_device, physical_device_info, vk_procs); - vkd3d_trace_physical_device_features(physical_device_info); -@@ -1634,48 +1709,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - device->feature_options5.RenderPassesTier = D3D12_RENDER_PASS_TIER_0; - device->feature_options5.RaytracingTier = D3D12_RAYTRACING_TIER_NOT_SUPPORTED; - -- if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, NULL))) < 0) -- { -- ERR("Failed to enumerate device extensions, vr %d.\n", vr); -- return hresult_from_vk_result(vr); -- } -- if (!count) -- return S_OK; -- -- if (!(vk_extensions = vkd3d_calloc(count, sizeof(*vk_extensions)))) -- return E_OUTOFMEMORY; -- -- TRACE("Enumerating %u device extensions.\n", count); -- if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, vk_extensions))) < 0) -- { -- ERR("Failed to enumerate device extensions, vr %d.\n", vr); -- vkd3d_free(vk_extensions); -- return hresult_from_vk_result(vr); -- } -- -- optional_extensions = vkd3d_find_struct(create_info->next, OPTIONAL_DEVICE_EXTENSIONS_INFO); -- if (optional_extensions && optional_extensions->extension_count) -- { -- if (!(*user_extension_supported = vkd3d_calloc(optional_extensions->extension_count, sizeof(bool)))) -- { -- vkd3d_free(vk_extensions); -- return E_OUTOFMEMORY; -- } -- } -- else -- { -- *user_extension_supported = NULL; -- } -- -- *device_extension_count = vkd3d_check_extensions(vk_extensions, count, -- required_device_extensions, ARRAY_SIZE(required_device_extensions), -- optional_device_extensions, ARRAY_SIZE(optional_device_extensions), -- create_info->device_extensions, create_info->device_extension_count, -- optional_extensions ? optional_extensions->extensions : NULL, -- optional_extensions ? optional_extensions->extension_count : 0, -- *user_extension_supported, vulkan_info, "device", -- device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); -- - fragment_shader_interlock = &physical_device_info->fragment_shader_interlock_features; - if (!fragment_shader_interlock->fragmentShaderSampleInterlock - || !fragment_shader_interlock->fragmentShaderPixelInterlock) -@@ -1701,7 +1734,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; - -- if (get_spec_version(vk_extensions, count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) -+ if (get_spec_version(vk_extensions, vk_extension_count, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) >= 3) - { - const VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *divisor_features; - divisor_features = &physical_device_info->vertex_divisor_features; -@@ -2098,8 +2131,6 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, - - VK_CALL(vkGetPhysicalDeviceMemoryProperties(physical_device, &device->memory_properties)); - -- vkd3d_physical_device_info_init(&physical_device_info, device); -- - if (FAILED(hr = vkd3d_init_device_caps(device, create_info, &physical_device_info, - &extension_count, &user_extension_supported))) - return hr; -diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c -index 58747342b5c..11029c9f5f9 100644 ---- a/libs/vkd3d/libs/vkd3d/utils.c -+++ b/libs/vkd3d/libs/vkd3d/utils.c -@@ -901,6 +901,30 @@ bool vkd3d_get_program_name(char program_name[PATH_MAX]) - return true; - } - -+#elif defined(WIN32) -+ -+bool vkd3d_get_program_name(char program_name[PATH_MAX]) -+{ -+ char buffer[MAX_PATH]; -+ char *p, *name; -+ size_t len; -+ -+ *program_name = '\0'; -+ len = GetModuleFileNameA(NULL, buffer, ARRAY_SIZE(buffer)); -+ if (!(len && len < MAX_PATH)) -+ return false; -+ -+ name = buffer; -+ if ((p = strrchr(name, '/'))) -+ name = p + 1; -+ if ((p = strrchr(name, '\\'))) -+ name = p + 1; -+ -+ len = strlen(name) + 1; -+ memcpy(program_name, name, len); -+ return true; -+} -+ - #else - - bool vkd3d_get_program_name(char program_name[PATH_MAX]) --- -2.43.0 - diff --git a/patches/vkd3d-latest/0007-Updated-vkd3d-to-9e57039fce4040c8bfadaa73bf449c00591.patch b/patches/vkd3d-latest/0007-Updated-vkd3d-to-9e57039fce4040c8bfadaa73bf449c00591.patch deleted file mode 100644 index a2249699..00000000 --- a/patches/vkd3d-latest/0007-Updated-vkd3d-to-9e57039fce4040c8bfadaa73bf449c00591.patch +++ /dev/null @@ -1,937 +0,0 @@ -From 9cd79b111e612defc44743beab2de36703bb1786 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 15 May 2024 08:23:37 +1000 -Subject: [PATCH] Updated vkd3d to 9e57039fce4040c8bfadaa73bf449c005912a83e. - ---- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 5 + - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 8 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 55 +++++ - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 7 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 3 + - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 193 +++++++++++++----- - libs/vkd3d/libs/vkd3d-shader/ir.c | 9 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 149 +++++++++++++- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 2 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 5 + - libs/vkd3d/libs/vkd3d/device.c | 97 ++++++++- - 11 files changed, 463 insertions(+), 70 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index f2ad39f2f07..9abc2c4db70 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -332,8 +332,10 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_WAVE_ACTIVE_BIT_AND ] = "wave_active_bit_and", - [VKD3DSIH_WAVE_ACTIVE_BIT_OR ] = "wave_active_bit_or", - [VKD3DSIH_WAVE_ACTIVE_BIT_XOR ] = "wave_active_bit_xor", -+ [VKD3DSIH_WAVE_ALL_BIT_COUNT ] = "wave_all_bit_count", - [VKD3DSIH_WAVE_ALL_TRUE ] = "wave_all_true", - [VKD3DSIH_WAVE_ANY_TRUE ] = "wave_any_true", -+ [VKD3DSIH_WAVE_IS_FIRST_LANE ] = "wave_is_first_lane", - [VKD3DSIH_WAVE_OP_ADD ] = "wave_op_add", - [VKD3DSIH_WAVE_OP_IMAX ] = "wave_op_imax", - [VKD3DSIH_WAVE_OP_IMIN ] = "wave_op_imin", -@@ -342,6 +344,9 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_WAVE_OP_MUL ] = "wave_op_mul", - [VKD3DSIH_WAVE_OP_UMAX ] = "wave_op_umax", - [VKD3DSIH_WAVE_OP_UMIN ] = "wave_op_umin", -+ [VKD3DSIH_WAVE_PREFIX_BIT_COUNT ] = "wave_prefix_bit_count", -+ [VKD3DSIH_WAVE_READ_LANE_AT ] = "wave_read_lane_at", -+ [VKD3DSIH_WAVE_READ_LANE_FIRST ] = "wave_read_lane_first", - [VKD3DSIH_XOR ] = "xor", - }; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 24a95224349..7c7c71e3c9a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1713,7 +1713,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { -- if (var->semantic.name || !var->regs[r].allocated) -+ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - ++uniform_count; -@@ -1751,14 +1751,14 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - { -- if (var->semantic.name || !var->regs[r].allocated) -+ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - put_u32(buffer, 0); /* name */ - if (r == HLSL_REGSET_NUMERIC) - { - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); -- put_u32(buffer, var->data_type->reg_size[r] / 4); -+ put_u32(buffer, var->bind_count[r]); - } - else - { -@@ -1780,7 +1780,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - { - size_t var_offset, name_offset; - -- if (var->semantic.name || !var->regs[r].allocated) -+ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) - continue; - - var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 29f736364dc..605e97530c1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -446,17 +446,22 @@ enum dx_intrinsic_opcode - DX_STORE_PATCH_CONSTANT = 106, - DX_OUTPUT_CONTROL_POINT_ID = 107, - DX_PRIMITIVE_ID = 108, -+ DX_WAVE_IS_FIRST_LANE = 110, - DX_WAVE_GET_LANE_INDEX = 111, - DX_WAVE_GET_LANE_COUNT = 112, - DX_WAVE_ANY_TRUE = 113, - DX_WAVE_ALL_TRUE = 114, - DX_WAVE_ACTIVE_ALL_EQUAL = 115, - DX_WAVE_ACTIVE_BALLOT = 116, -+ DX_WAVE_READ_LANE_AT = 117, -+ DX_WAVE_READ_LANE_FIRST = 118, - DX_WAVE_ACTIVE_OP = 119, - DX_WAVE_ACTIVE_BIT = 120, - DX_WAVE_PREFIX_OP = 121, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, -+ DX_WAVE_ALL_BIT_COUNT = 135, -+ DX_WAVE_PREFIX_BIT_COUNT = 136, - DX_RAW_BUFFER_LOAD = 139, - DX_RAW_BUFFER_STORE = 140, - }; -@@ -3816,6 +3821,8 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( - { - switch (sysval_semantic) - { -+ case VKD3D_SHADER_SV_COVERAGE: -+ return VKD3DSPR_COVERAGE; - case VKD3D_SHADER_SV_DEPTH: - return VKD3DSPR_DEPTHOUT; - case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: -@@ -4489,6 +4496,25 @@ static bool sm6_parser_emit_coordinate_construct(struct sm6_parser *sm6, const s - return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); - } - -+static enum vkd3d_shader_opcode sm6_dx_map_void_op(enum dx_intrinsic_opcode op) -+{ -+ switch (op) -+ { -+ case DX_WAVE_IS_FIRST_LANE: -+ return VKD3DSIH_WAVE_IS_FIRST_LANE; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void sm6_parser_emit_dx_void(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_void_op(op)); -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) - { - switch (op) -@@ -4555,10 +4581,16 @@ static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) - return VKD3DSIH_F16TOF32; - case DX_WAVE_ACTIVE_ALL_EQUAL: - return VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL; -+ case DX_WAVE_ALL_BIT_COUNT: -+ return VKD3DSIH_WAVE_ALL_BIT_COUNT; - case DX_WAVE_ALL_TRUE: - return VKD3DSIH_WAVE_ALL_TRUE; - case DX_WAVE_ANY_TRUE: - return VKD3DSIH_WAVE_ANY_TRUE; -+ case DX_WAVE_PREFIX_BIT_COUNT: -+ return VKD3DSIH_WAVE_PREFIX_BIT_COUNT; -+ case DX_WAVE_READ_LANE_FIRST: -+ return VKD3DSIH_WAVE_READ_LANE_FIRST; - default: - vkd3d_unreachable(); - } -@@ -4594,6 +4626,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co - return VKD3DSIH_UMAX; - case DX_UMIN: - return VKD3DSIH_UMIN; -+ case DX_WAVE_READ_LANE_AT: -+ return VKD3DSIH_WAVE_READ_LANE_AT; - default: - vkd3d_unreachable(); - } -@@ -6233,11 +6267,16 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_WAVE_ACTIVE_BALLOT ] = {"V", "1", sm6_parser_emit_dx_wave_active_ballot}, - [DX_WAVE_ACTIVE_BIT ] = {"m", "Rc", sm6_parser_emit_dx_wave_active_bit}, - [DX_WAVE_ACTIVE_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, -+ [DX_WAVE_ALL_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, - [DX_WAVE_ALL_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, - [DX_WAVE_ANY_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, - [DX_WAVE_GET_LANE_COUNT ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, - [DX_WAVE_GET_LANE_INDEX ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, -+ [DX_WAVE_IS_FIRST_LANE ] = {"1", "", sm6_parser_emit_dx_void}, -+ [DX_WAVE_PREFIX_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, - [DX_WAVE_PREFIX_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, -+ [DX_WAVE_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, -+ [DX_WAVE_READ_LANE_FIRST ] = {"n", "R", sm6_parser_emit_dx_unary}, - }; - - static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_value *value, char info_type, -@@ -6697,6 +6736,15 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor - - code = record->operands[i++]; - -+ /* dxcompiler occasionally emits bool not-equal-to-false, which is a no-op. Bool comparisons -+ * do not otherwise occur, so deleting these avoids the need for backend support. */ -+ if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) -+ { -+ ins->handler_idx = VKD3DSIH_NOP; -+ *dst = *a; -+ return; -+ } -+ - if ((!is_int && !is_fp) || is_int != (code >= ICMP_EQ)) - { - FIXME("Invalid operation %"PRIu64" on type class %u.\n", code, type_a->class); -@@ -8407,8 +8455,11 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = - [SEMANTIC_KIND_VERTEXID] = VKD3D_SHADER_SV_VERTEX_ID, - [SEMANTIC_KIND_INSTANCEID] = VKD3D_SHADER_SV_INSTANCE_ID, - [SEMANTIC_KIND_POSITION] = VKD3D_SHADER_SV_POSITION, -+ [SEMANTIC_KIND_CLIPDISTANCE] = VKD3D_SHADER_SV_CLIP_DISTANCE, -+ [SEMANTIC_KIND_CULLDISTANCE] = VKD3D_SHADER_SV_CULL_DISTANCE, - [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, - [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, -+ [SEMANTIC_KIND_COVERAGE] = VKD3D_SHADER_SV_COVERAGE, - [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, - [SEMANTIC_KIND_DEPTH] = VKD3D_SHADER_SV_DEPTH, - [SEMANTIC_KIND_DEPTHLESSEQUAL] = VKD3D_SHADER_SV_DEPTH_LESS_EQUAL, -@@ -9358,6 +9409,10 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - e->used_mask <<= index; - } - -+ /* DXIL reads/writes uint for bool I/O. */ -+ if (e->component_type == VKD3D_SHADER_COMPONENT_BOOL) -+ e->component_type = VKD3D_SHADER_COMPONENT_UINT; -+ - m = element_node->operands[4]; - if (!sm6_metadata_value_is_node(m)) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index a89e43f9bf2..08a017874ae 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -447,9 +447,10 @@ struct hlsl_ir_var - enum hlsl_sampler_dim sampler_dim; - struct vkd3d_shader_location first_sampler_dim_loc; - } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; -- /* Minimum number of binds required to include all object components actually used in the shader. -- * It may be less than the allocation size, e.g. for texture arrays. */ -- unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; -+ /* Minimum number of binds required to include all components actually used in the shader. -+ * It may be less than the allocation size, e.g. for texture arrays. -+ * The bind_count for HLSL_REGSET_NUMERIC is only used in uniforms for now. */ -+ unsigned int bind_count[HLSL_REGSET_LAST + 1]; - - /* Whether the shader performs dereferences with non-constant offsets in the variable. */ - bool indexable; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 79317bb0545..c6b6219eb4b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -168,6 +168,9 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) - - static void destroy_block(struct hlsl_block *block) - { -+ if (!block) -+ return; -+ - hlsl_block_cleanup(block); - vkd3d_free(block); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 8882deaf6cd..26179042082 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -4248,34 +4248,67 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls - return false; - } - --static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) - { -- struct hlsl_ir_resource_load *load; -- struct hlsl_ir_var *var; -- enum hlsl_regset regset; -+ struct hlsl_ir_var *var = deref->var; -+ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); -+ uint32_t required_bind_count; -+ struct hlsl_type *type; - unsigned int index; - -- if (instr->type != HLSL_IR_RESOURCE_LOAD) -- return false; -- -- load = hlsl_ir_resource_load(instr); -- var = load->resource.var; -+ if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) -+ return; - -- regset = hlsl_deref_get_regset(ctx, &load->resource); -+ if (regset <= HLSL_REGSET_LAST_OBJECT) -+ { -+ var->objects_usage[regset][index].used = true; -+ var->bind_count[regset] = max(var->bind_count[regset], index + 1); -+ } -+ else if (regset == HLSL_REGSET_NUMERIC) -+ { -+ type = hlsl_deref_get_type(ctx, deref); - -- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -- return false; -+ hlsl_regset_index_from_deref(ctx, deref, regset, &index); -+ required_bind_count = align(index + type->reg_size[regset], 4) / 4; -+ var->bind_count[regset] = max(var->bind_count[regset], required_bind_count); -+ } -+ else -+ { -+ vkd3d_unreachable(); -+ } -+} - -- var->objects_usage[regset][index].used = true; -- var->bind_count[regset] = max(var->bind_count[regset], index + 1); -- if (load->sampler.var) -+static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ switch (instr->type) - { -- var = load->sampler.var; -- if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) -- return false; -+ case HLSL_IR_LOAD: -+ { -+ struct hlsl_ir_load *load = hlsl_ir_load(instr); -+ -+ if (!load->src.var->is_uniform) -+ return false; -+ -+ /* These will are handled by validate_static_object_references(). */ -+ if (hlsl_deref_get_regset(ctx, &load->src) != HLSL_REGSET_NUMERIC) -+ return false; -+ -+ register_deref_usage(ctx, &load->src); -+ break; -+ } -+ -+ case HLSL_IR_RESOURCE_LOAD: -+ register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->resource); -+ if (hlsl_ir_resource_load(instr)->sampler.var) -+ register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->sampler); -+ break; - -- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; -- var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); -+ case HLSL_IR_RESOURCE_STORE: -+ register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); -+ break; -+ -+ default: -+ break; - } - - return false; -@@ -4520,16 +4553,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - } - } - -+static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort) -+{ -+ struct hlsl_ir_var *var; -+ -+ list_remove(&to_sort->extern_entry); -+ -+ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) -+ { -+ uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC]; -+ uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC]; -+ -+ if (to_sort_size > var_size) -+ { -+ list_add_before(&var->extern_entry, &to_sort->extern_entry); -+ return; -+ } -+ } -+ -+ list_add_tail(sorted, &to_sort->extern_entry); -+} -+ -+static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) -+{ -+ struct list sorted = LIST_INIT(sorted); -+ struct hlsl_ir_var *var, *next; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_uniform) -+ sort_uniform_by_numeric_bind_count(&sorted, var); -+ } -+ list_move_tail(&ctx->extern_vars, &sorted); -+} -+ - static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct register_allocator allocator = {0}; - struct hlsl_ir_var *var; - -+ sort_uniforms_by_numeric_bind_count(ctx); -+ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; - -- if (!var->is_uniform || !var->last_read || reg_size == 0) -+ if (!var->is_uniform || reg_size == 0) - continue; - - if (var->reg_reservation.reg_type == 'c') -@@ -4560,15 +4629,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; - -- if (!var->is_uniform || !var->last_read || reg_size == 0) -+ if (!var->is_uniform || alloc_size == 0) - continue; - - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) - { -- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, -- 1, UINT_MAX, var->data_type); -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size); - TRACE("Allocated %s to %s.\n", var->name, - debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); - } -@@ -5102,14 +5170,15 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - return true; - } - -+/* Retrieves true if the index is constant, and false otherwise. In the latter case, the maximum -+ * possible index is retrieved, assuming there is not out-of-bounds access. */ - bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - enum hlsl_regset regset, unsigned int *index) - { - struct hlsl_type *type = deref->var->data_type; -+ bool index_is_constant = true; - unsigned int i; - -- assert(regset <= HLSL_REGSET_LAST_OBJECT); -- - *index = 0; - - for (i = 0; i < deref->path_len; ++i) -@@ -5118,37 +5187,62 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - unsigned int idx = 0; - - assert(path_node); -- if (path_node->type != HLSL_IR_CONSTANT) -- return false; -+ if (path_node->type == HLSL_IR_CONSTANT) -+ { -+ /* We should always have generated a cast to UINT. */ -+ assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - -- /* We should always have generated a cast to UINT. */ -- assert(path_node->data_type->class == HLSL_CLASS_SCALAR -- && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ idx = hlsl_ir_constant(path_node)->value.u[0].u; - -- idx = hlsl_ir_constant(path_node)->value.u[0].u; -+ switch (type->class) -+ { -+ case HLSL_CLASS_ARRAY: -+ if (idx >= type->e.array.elements_count) -+ return false; - -- switch (type->class) -+ *index += idx * type->e.array.type->reg_size[regset]; -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ *index += type->e.record.fields[idx].reg_offset[regset]; -+ break; -+ -+ case HLSL_CLASS_MATRIX: -+ *index += 4 * idx; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ } -+ else - { -- case HLSL_CLASS_ARRAY: -- if (idx >= type->e.array.elements_count) -- return false; -+ index_is_constant = false; - -- *index += idx * type->e.array.type->reg_size[regset]; -- break; -+ switch (type->class) -+ { -+ case HLSL_CLASS_ARRAY: -+ idx = type->e.array.elements_count - 1; -+ *index += idx * type->e.array.type->reg_size[regset]; -+ break; - -- case HLSL_CLASS_STRUCT: -- *index += type->e.record.fields[idx].reg_offset[regset]; -- break; -+ case HLSL_CLASS_MATRIX: -+ idx = hlsl_type_major_size(type) - 1; -+ *index += idx * 4; -+ break; - -- default: -- vkd3d_unreachable(); -+ default: -+ vkd3d_unreachable(); -+ } - } - - type = hlsl_get_element_type_from_path_index(ctx, type, path_node); - } - -- assert(type->reg_size[regset] == 1); -- return true; -+ assert(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); -+ assert(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); -+ return index_is_constant; - } - - bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) -@@ -5440,7 +5534,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - if (profile->major_version >= 4) - hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); -- hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); -+ -+ do -+ compute_liveness(ctx, entry_func); -+ while (hlsl_transform_ir(ctx, dce, body, NULL)); -+ -+ hlsl_transform_ir(ctx, track_components_usage, body, NULL); - sort_synthetic_separated_samplers_first(ctx); - - if (profile->major_version < 4) -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index c6ecbdd9e46..2c78447e382 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -5748,11 +5748,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) - instruction->declaration.max_tessellation_factor); - return; - -- /* The DXIL parser can generate these outside phases, but this is not an issue. */ -- case VKD3DSIH_DCL_INPUT: -- case VKD3DSIH_DCL_OUTPUT: -- return; -- - case VKD3DSIH_DCL_INPUT_PRIMITIVE: - if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED - || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -@@ -5810,7 +5805,9 @@ static void vsir_validate_instruction(struct validation_context *ctx) - break; - } - -- if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID) -+ /* Only DCL instructions may occur outside hull shader phases. */ -+ if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL -+ && ctx->phase == VKD3DSIH_INVALID) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "Instruction %#x appear before any phase instruction in a hull shader.", - instruction->handler_idx); -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 813e20fdcd7..4ee8e6bba4c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -1760,6 +1760,45 @@ static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_ - result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); - } - -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, SpvGroupOperation group_op, uint32_t val_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBallotBitCount, -+ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), group_op, val_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_elect(struct vkd3d_spirv_builder *builder) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); -+ return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpGroupNonUniformElect, -+ vkd3d_spirv_get_op_type_bool(builder), vkd3d_spirv_get_op_scope_subgroup(builder)); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t lane_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcast, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_shuffle(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t lane_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformShuffle); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformShuffle, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast_first(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); -+ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcastFirst, -+ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); -+} -+ - static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, - enum GLSLstd450 op, uint32_t result_type, uint32_t operand) - { -@@ -3602,8 +3641,9 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); - } - -+ /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ - if (reg->alignment) -- WARN("Ignoring alignment %u.\n", reg->alignment); -+ TRACE("Ignoring alignment %u.\n", reg->alignment); - - if (index_count) - { -@@ -5827,7 +5867,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler - - if (flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)) - FIXME("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); -- else -+ else if (flags) - WARN("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); - } - -@@ -5889,8 +5929,9 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil - vsir_register_init(®, VKD3DSPR_IDXTEMP, VKD3D_DATA_FLOAT, 1); - reg.idx[0].offset = temp->register_idx; - -+ /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ - if (temp->alignment) -- WARN("Ignoring alignment %u.\n", temp->alignment); -+ TRACE("Ignoring alignment %u.\n", temp->alignment); - - function_location = spirv_compiler_get_current_function_location(compiler); - vkd3d_spirv_begin_function_stream_insertion(builder, function_location); -@@ -9801,18 +9842,26 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - --static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static uint32_t spirv_compiler_emit_group_nonuniform_ballot(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_src_param *src) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_dst_param *dst = instruction->dst; -- const struct vkd3d_shader_src_param *src = instruction->src; - uint32_t type_id, val_id; - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); - val_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); - val_id = vkd3d_spirv_build_op_group_nonuniform_ballot(builder, type_id, val_id); - -+ return val_id; -+} -+ -+static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ uint32_t val_id; -+ -+ val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - -@@ -9871,6 +9920,79 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - -+static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ SpvGroupOperation group_op; -+ uint32_t type_id, val_id; -+ -+ group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan -+ : SpvGroupOperationReduce; -+ -+ val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); -+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(builder, type_id, group_op, val_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_wave_is_first_lane(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ uint32_t val_id; -+ -+ val_id = vkd3d_spirv_build_op_group_nonuniform_elect(builder); -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_wave_read_lane_at(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, lane_id, val_id; -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); -+ -+ /* TODO: detect values loaded from a const buffer? */ -+ if (register_is_constant_or_undef(&src[1].reg)) -+ { -+ /* Uniform lane_id only. */ -+ val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast(builder, type_id, val_id, lane_id); -+ } -+ else -+ { -+ /* WaveReadLaneAt supports non-uniform lane ids, so if lane_id is not constant it may not be uniform. */ -+ val_id = vkd3d_spirv_build_op_group_nonuniform_shuffle(builder, type_id, val_id, lane_id); -+ } -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_wave_read_lane_first(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id; -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast_first(builder, type_id, val_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ - /* This function is called after declarations are processed. */ - static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) - { -@@ -10236,6 +10358,19 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_WAVE_OP_UMIN: - spirv_compiler_emit_wave_alu_op(compiler, instruction); - break; -+ case VKD3DSIH_WAVE_ALL_BIT_COUNT: -+ case VKD3DSIH_WAVE_PREFIX_BIT_COUNT: -+ spirv_compiler_emit_wave_bit_count(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_IS_FIRST_LANE: -+ spirv_compiler_emit_wave_is_first_lane(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_READ_LANE_AT: -+ spirv_compiler_emit_wave_read_lane_at(compiler, instruction); -+ break; -+ case VKD3DSIH_WAVE_READ_LANE_FIRST: -+ spirv_compiler_emit_wave_read_lane_first(compiler, instruction); -+ break; - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 29b2c1482a9..c15dae52c50 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -2004,7 +2004,7 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, - { - void *params; - -- if (count > allocator->count - allocator->index) -+ if (!allocator->current || count > allocator->count - allocator->index) - { - struct vkd3d_shader_param_node *next; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index bf9d3038f08..742189cefbb 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -533,8 +533,10 @@ enum vkd3d_shader_opcode - VKD3DSIH_WAVE_ACTIVE_BIT_AND, - VKD3DSIH_WAVE_ACTIVE_BIT_OR, - VKD3DSIH_WAVE_ACTIVE_BIT_XOR, -+ VKD3DSIH_WAVE_ALL_BIT_COUNT, - VKD3DSIH_WAVE_ALL_TRUE, - VKD3DSIH_WAVE_ANY_TRUE, -+ VKD3DSIH_WAVE_IS_FIRST_LANE, - VKD3DSIH_WAVE_OP_ADD, - VKD3DSIH_WAVE_OP_IMAX, - VKD3DSIH_WAVE_OP_IMIN, -@@ -543,6 +545,9 @@ enum vkd3d_shader_opcode - VKD3DSIH_WAVE_OP_MUL, - VKD3DSIH_WAVE_OP_UMAX, - VKD3DSIH_WAVE_OP_UMIN, -+ VKD3DSIH_WAVE_PREFIX_BIT_COUNT, -+ VKD3DSIH_WAVE_READ_LANE_AT, -+ VKD3DSIH_WAVE_READ_LANE_FIRST, - VKD3DSIH_XOR, - - VKD3DSIH_INVALID, -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 36d8433939a..8b817264e63 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -3487,12 +3487,20 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 - return E_INVALIDARG; - } - -+ if (data->HighestShaderModel != D3D_SHADER_MODEL_5_1 -+ && (data->HighestShaderModel < D3D_SHADER_MODEL_6_0 -+ || data->HighestShaderModel > D3D_HIGHEST_SHADER_MODEL)) -+ { -+ WARN("Unknown shader model %#x.\n", data->HighestShaderModel); -+ return E_INVALIDARG; -+ } -+ - TRACE("Request shader model %#x.\n", data->HighestShaderModel); - - #ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -- data->HighestShaderModel = D3D_SHADER_MODEL_6_0; -+ data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_6_0); - #else -- data->HighestShaderModel = D3D_SHADER_MODEL_5_1; -+ data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_5_1); - #endif - - TRACE("Shader model %#x.\n", data->HighestShaderModel); -@@ -3911,6 +3919,91 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 - return S_OK; - } - -+ case D3D12_FEATURE_D3D12_OPTIONS14: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS14 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->AdvancedTextureOpsSupported = FALSE; -+ data->WriteableMSAATexturesSupported = FALSE; -+ data->IndependentFrontAndBackStencilRefMaskSupported = FALSE; -+ -+ TRACE("Advanced texture ops %#x.\n", data->AdvancedTextureOpsSupported); -+ TRACE("Writeable MSAA textures %#x.\n", data->WriteableMSAATexturesSupported); -+ TRACE("Independent front and back stencil ref mask %#x.\n", data->IndependentFrontAndBackStencilRefMaskSupported); -+ return S_OK; -+ } -+ -+ case D3D12_FEATURE_D3D12_OPTIONS15: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS15 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->TriangleFanSupported = FALSE; -+ data->DynamicIndexBufferStripCutSupported = FALSE; -+ -+ TRACE("Triangle fan %#x.\n", data->TriangleFanSupported); -+ TRACE("Dynamic index buffer strip cut %#x.\n", data->DynamicIndexBufferStripCutSupported); -+ return S_OK; -+ } -+ -+ case D3D12_FEATURE_D3D12_OPTIONS16: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS16 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->DynamicDepthBiasSupported = FALSE; -+ data->GPUUploadHeapSupported = FALSE; -+ -+ TRACE("Dynamic depth bias %#x.\n", data->DynamicDepthBiasSupported); -+ TRACE("GPU upload heap %#x.\n", data->GPUUploadHeapSupported); -+ return S_OK; -+ } -+ -+ case D3D12_FEATURE_D3D12_OPTIONS17: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS17 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->NonNormalizedCoordinateSamplersSupported = FALSE; -+ data->ManualWriteTrackingResourceSupported = FALSE; -+ -+ TRACE("Non-normalized coordinate samplers %#x.\n", data->NonNormalizedCoordinateSamplersSupported); -+ TRACE("Manual write tracking resource %#x.\n", data->ManualWriteTrackingResourceSupported); -+ return S_OK; -+ } -+ -+ case D3D12_FEATURE_D3D12_OPTIONS18: -+ { -+ D3D12_FEATURE_DATA_D3D12_OPTIONS18 *data = feature_data; -+ -+ if (feature_data_size != sizeof(*data)) -+ { -+ WARN("Invalid size %u.\n", feature_data_size); -+ } -+ -+ data->RenderPassesValid = FALSE; -+ -+ TRACE("Render passes valid %#x.\n", data->RenderPassesValid); -+ return S_OK; -+ } -+ - default: - FIXME("Unhandled feature %#x.\n", feature); - return E_NOTIMPL; --- -2.43.0 - diff --git a/patches/vkd3d-latest/0008-Updated-vkd3d-to-061dc390367b4c83022d5fe1255f8d38f6b.patch b/patches/vkd3d-latest/0008-Updated-vkd3d-to-061dc390367b4c83022d5fe1255f8d38f6b.patch deleted file mode 100644 index 73845b28..00000000 --- a/patches/vkd3d-latest/0008-Updated-vkd3d-to-061dc390367b4c83022d5fe1255f8d38f6b.patch +++ /dev/null @@ -1,153 +0,0 @@ -From cda6dd1902e0113ad3730c1f696138b668bbfacb Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 16 May 2024 11:56:37 +1000 -Subject: [PATCH] Updated vkd3d to 061dc390367b4c83022d5fe1255f8d38f6b7ce9c. - ---- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 13 ++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 69 ++++++++++++--------- - 3 files changed, 47 insertions(+), 36 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 08a017874ae..27814f3a56f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -1250,6 +1250,7 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const - - void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); - -+void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); - int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index c6b6219eb4b..9c1bdef926d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -1293,7 +1293,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - struct hlsl_ir_node *node; - struct hlsl_block expr; - unsigned int ret = 0; -- bool progress; -+ struct hlsl_src src; - - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) - { -@@ -1330,13 +1330,12 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - return 0; - } - -- do -- { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, &expr); -- } while (progress); -+ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -+ hlsl_src_from_node(&src, node_from_block(&expr)); -+ hlsl_run_const_passes(ctx, &expr); -+ node = src.node; -+ hlsl_src_remove(&src); - -- node = node_from_block(&expr); - if (node->type == HLSL_IR_CONSTANT) - { - constant = hlsl_ir_constant(node); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 26179042082..27f16af51c5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -5408,6 +5408,44 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod - } - } - -+void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) -+{ -+ bool progress; -+ -+ lower_ir(ctx, lower_matrix_swizzles, body); -+ lower_ir(ctx, lower_index_loads, body); -+ -+ lower_ir(ctx, lower_broadcasts, body); -+ while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); -+ do -+ { -+ progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); -+ progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); -+ } -+ while (progress); -+ hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); -+ -+ lower_ir(ctx, lower_narrowing_casts, body); -+ lower_ir(ctx, lower_int_dot, body); -+ lower_ir(ctx, lower_int_division, body); -+ lower_ir(ctx, lower_int_modulus, body); -+ lower_ir(ctx, lower_int_abs, body); -+ lower_ir(ctx, lower_casts_to_bool, body); -+ lower_ir(ctx, lower_float_modulus, body); -+ hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); -+ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, body); -+ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); -+ } while (progress); -+} -+ - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) - { -@@ -5416,7 +5454,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - struct recursive_call_ctx recursive_call_ctx; - struct hlsl_ir_var *var; - unsigned int i; -- bool progress; - - list_move_head(&body->instrs, &ctx->static_initializers.instrs); - -@@ -5494,35 +5531,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - { - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); - } -- lower_ir(ctx, lower_broadcasts, body); -- while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); -- do -- { -- progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); -- progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); -- } -- while (progress); -- hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); - -- lower_ir(ctx, lower_narrowing_casts, body); -- lower_ir(ctx, lower_int_dot, body); -- lower_ir(ctx, lower_int_division, body); -- lower_ir(ctx, lower_int_modulus, body); -- lower_ir(ctx, lower_int_abs, body); -- lower_ir(ctx, lower_casts_to_bool, body); -- lower_ir(ctx, lower_float_modulus, body); -- hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); -- do -- { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); -- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, body); -- progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -- progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); -- progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); -- } -- while (progress); -+ hlsl_run_const_passes(ctx, body); -+ - remove_unreachable_code(ctx, body); - hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); - --- -2.43.0 -