diff --git a/patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch b/patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch index fc2dd35f..cac64864 100644 --- a/patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch +++ b/patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch @@ -1,7 +1,7 @@ -From 226088587d4ba04bd8f9ee05b300ce7d03377187 Mon Sep 17 00:00:00 2001 +From de371a0eaab2304c0c9f1facb9941d712949686d Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 17 May 2023 08:35:40 +1000 -Subject: [PATCH 1/3] Update vkd3d to 771e442af16228a977eebba82224f06f6d0202fe +Subject: [PATCH] Update vkd3d to 771e442af16228a977eebba82224f06f6d0202fe (1.8) --- diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch deleted file mode 100644 index 75b39b31..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-2a3413e0f01524f2068bce12100906eb220.patch +++ /dev/null @@ -1,9440 +0,0 @@ -From ef302da9a91d5d7d4456506303ce3f3964f9081f Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 28 Jun 2023 16:27:03 +1000 -Subject: [PATCH 2/3] Updated vkd3d to - 2a3413e0f01524f2068bce12100906eb2200c965. - ---- - include/d3d12.idl | 4 +- - libs/vkd3d/Makefile.in | 1 + - libs/vkd3d/include/private/vkd3d_common.h | 7 + - .../include/private/vkd3d_shader_utils.h | 63 + - libs/vkd3d/include/vkd3d.h | 35 + - libs/vkd3d/include/vkd3d_shader.h | 116 +- - libs/vkd3d/libs/vkd3d-common/debug.c | 17 +- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 48 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 311 ++- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 16 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 2313 +++++++++++++++++ - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 161 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 32 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1323 ++++++---- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 384 ++- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 363 ++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 26 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 372 ++- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 164 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 29 + - libs/vkd3d/libs/vkd3d/command.c | 222 +- - libs/vkd3d/libs/vkd3d/device.c | 2 + - libs/vkd3d/libs/vkd3d/resource.c | 51 +- - libs/vkd3d/libs/vkd3d/state.c | 10 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 47 +- - 25 files changed, 5088 insertions(+), 1029 deletions(-) - create mode 100644 libs/vkd3d/include/private/vkd3d_shader_utils.h - create mode 100644 libs/vkd3d/libs/vkd3d-shader/dxil.c - -diff --git a/include/d3d12.idl b/include/d3d12.idl -index 4fec32d2656..c6064939e1f 100644 ---- a/include/d3d12.idl -+++ b/include/d3d12.idl -@@ -2243,8 +2243,8 @@ interface ID3D12CommandQueue : ID3D12Pageable - ID3D12Heap *heap, - UINT range_count, - const D3D12_TILE_RANGE_FLAGS *range_flags, -- UINT *heap_range_offsets, -- UINT *range_tile_counts, -+ const UINT *heap_range_offsets, -+ const UINT *range_tile_counts, - D3D12_TILE_MAPPING_FLAGS flags); - - void CopyTileMappings(ID3D12Resource *dst_resource, -diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in -index 1ba0e9f71e1..f647af11d07 100644 ---- a/libs/vkd3d/Makefile.in -+++ b/libs/vkd3d/Makefile.in -@@ -17,6 +17,7 @@ SOURCES = \ - libs/vkd3d-shader/d3d_asm.c \ - libs/vkd3d-shader/d3dbc.c \ - libs/vkd3d-shader/dxbc.c \ -+ libs/vkd3d-shader/dxil.c \ - libs/vkd3d-shader/glsl.c \ - libs/vkd3d-shader/hlsl.c \ - libs/vkd3d-shader/hlsl.l \ -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 1ac23b4a085..0263fc47297 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -20,6 +20,7 @@ - #define __VKD3D_COMMON_H - - #include "config.h" -+#define WIN32_LEAN_AND_MEAN - #include "windows.h" - #include "vkd3d_types.h" - -@@ -28,6 +29,7 @@ - #include - #include - #include -+#include - - #ifdef _MSC_VER - #include -@@ -171,6 +173,11 @@ static inline bool vkd3d_bound_range(size_t start, size_t count, size_t limit) - #endif - } - -+static inline bool vkd3d_object_range_overflow(size_t start, size_t count, size_t size) -+{ -+ return (~(size_t)0 - start) / size < count; -+} -+ - static inline uint16_t vkd3d_make_u16(uint8_t low, uint8_t high) - { - return low | ((uint16_t)high << 8); -diff --git a/libs/vkd3d/include/private/vkd3d_shader_utils.h b/libs/vkd3d/include/private/vkd3d_shader_utils.h -new file mode 100644 -index 00000000000..00052a89988 ---- /dev/null -+++ b/libs/vkd3d/include/private/vkd3d_shader_utils.h -@@ -0,0 +1,63 @@ -+/* -+ * Copyright 2023 Conor McCarthy for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __VKD3D_SHADER_UTILS_H -+#define __VKD3D_SHADER_UTILS_H -+ -+#include "vkd3d_shader.h" -+ -+#define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') -+#define TAG_SHDR VKD3D_MAKE_TAG('S', 'H', 'D', 'R') -+#define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') -+ -+static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct vkd3d_shader_code *dxbc, -+ enum vkd3d_shader_source_type *type, char **messages) -+{ -+ struct vkd3d_shader_dxbc_desc desc; -+ enum vkd3d_result ret; -+ unsigned int i; -+ -+ *type = VKD3D_SHADER_SOURCE_NONE; -+ -+ if ((ret = vkd3d_shader_parse_dxbc(dxbc, 0, &desc, messages)) < 0) -+ return ret; -+ -+ for (i = 0; i < desc.section_count; ++i) -+ { -+ uint32_t tag = desc.sections[i].tag; -+ if (tag == TAG_SHDR || tag == TAG_SHEX) -+ { -+ *type = VKD3D_SHADER_SOURCE_DXBC_TPF; -+ } -+ else if (tag == TAG_DXIL) -+ { -+ *type = VKD3D_SHADER_SOURCE_DXBC_DXIL; -+ /* Default to DXIL if both are present. */ -+ break; -+ } -+ } -+ -+ vkd3d_shader_free_dxbc(&desc); -+ -+ if (*type == VKD3D_SHADER_SOURCE_NONE) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ return VKD3D_OK; -+} -+ -+#endif /* __VKD3D_SHADER_UTILS_H */ -diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index 72ed3ced671..2ccda47248a 100644 ---- a/libs/vkd3d/include/vkd3d.h -+++ b/libs/vkd3d/include/vkd3d.h -@@ -207,7 +207,42 @@ VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); - VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device); - - VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); -+ -+/** -+ * Acquire the Vulkan queue backing a command queue. -+ * -+ * While a queue is acquired by the client, it is locked so that -+ * neither the vkd3d library nor other threads can submit work to -+ * it. For that reason it should be released as soon as possible with -+ * vkd3d_release_vk_queue(). The lock is not reentrant, so the same -+ * queue must not be acquired more than once by the same thread. -+ * -+ * Work submitted through the Direct3D 12 API exposed by vkd3d is not -+ * always immediately submitted to the Vulkan queue; sometimes it is -+ * kept in another internal queue, which might not necessarily be -+ * empty at the time vkd3d_acquire_vk_queue() is called. For this -+ * reason, work submitted directly to the Vulkan queue might appear to -+ * the Vulkan driver as being submitted before other work submitted -+ * though the Direct3D 12 API. If this is not desired, it is -+ * recommended to synchronize work submission using an ID3D12Fence -+ * object, by submitting to the queue a signal operation after all the -+ * Direct3D 12 work is submitted and waiting for it before calling -+ * vkd3d_acquire_vk_queue(). -+ * -+ * \since 1.0 -+ */ - VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); -+ -+/** -+ * Release the Vulkan queue backing a command queue. -+ * -+ * This must be paired to an earlier corresponding -+ * vkd3d_acquire_vk_queue(). After this function is called, the Vulkan -+ * queue returned by vkd3d_acquire_vk_queue() must not be used any -+ * more. -+ * -+ * \since 1.0 -+ */ - VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue); - - VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 274241546ea..6c17a07b9d2 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -85,6 +85,11 @@ enum vkd3d_shader_structure_type - * \since 1.3 - */ - VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, -+ /** -+ * The structure is a vkd3d_shader_scan_signature_info structure. -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), - }; -@@ -611,6 +616,11 @@ enum vkd3d_shader_source_type - * model 1, 2, and 3 shaders. \since 1.3 - */ - VKD3D_SHADER_SOURCE_D3D_BYTECODE, -+ /** -+ * A 'DirectX Intermediate Language' shader embedded in a DXBC container. This is -+ * the format used for Direct3D shader model 6 shaders. \since 1.9 -+ */ -+ VKD3D_SHADER_SOURCE_DXBC_DXIL, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), - }; -@@ -620,7 +630,7 @@ enum vkd3d_shader_target_type - { - /** - * The shader has no type or is to be ignored. This is not a valid value -- * for vkd3d_shader_compile() or vkd3d_shader_scan(). -+ * for vkd3d_shader_compile(). - */ - VKD3D_SHADER_TARGET_NONE, - /** -@@ -1551,6 +1561,64 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com - | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); - } - -+/** -+ * A chained structure containing descriptions of shader inputs and outputs. -+ * -+ * This structure is currently implemented only for DXBC and legacy D3D bytecode -+ * source types. -+ * For DXBC shaders, the returned information is parsed directly from the -+ * signatures embedded in the DXBC shader. -+ * For legacy D3D shaders, the returned information is synthesized based on -+ * registers declared or used by shader instructions. -+ * For all other shader types, the structure is zeroed. -+ * -+ * All members (except for \ref type and \ref next) are output-only. -+ * -+ * This structure is passed to vkd3d_shader_scan() and extends -+ * vkd3d_shader_compile_info. -+ * -+ * Members of this structure are allocated by vkd3d-shader and should be freed -+ * with vkd3d_shader_free_scan_signature_info() when no longer needed. -+ * -+ * All signatures may contain pointers into the input shader, and should only -+ * be accessed while the input shader remains valid. -+ * -+ * Signature elements are synthesized from legacy Direct3D bytecode as follows: -+ * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an -+ * uppercase string corresponding to the HLSL name for the usage, e.g. -+ * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. -+ * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the -+ * usage index. -+ * - The \ref vkd3d_shader_signature_element.stream_index is always 0. -+ * -+ * Signature elements are synthesized for any input or output register declared -+ * or used in a legacy Direct3D bytecode shader, including the following: -+ * - Shader model 1 and 2 colour and texture coordinate registers. -+ * - The shader model 1 pixel shader output register. -+ * - Shader model 1 and 2 vertex shader output registers (position, fog, and -+ * point size). -+ * - Shader model 3 pixel shader system value input registers (pixel position -+ * and face). -+ * -+ * \since 1.9 -+ */ -+struct vkd3d_shader_scan_signature_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** The shader input varyings. */ -+ struct vkd3d_shader_signature input; -+ -+ /** The shader output varyings. */ -+ struct vkd3d_shader_signature output; -+ -+ /** The shader patch constant varyings. */ -+ struct vkd3d_shader_signature patch_constant; -+}; -+ - #ifdef LIBVKD3D_SHADER_SOURCE - # define VKD3D_SHADER_API VKD3D_EXPORT - #else -@@ -1625,6 +1693,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported - * following chained structures: - * - vkd3d_shader_interface_info - * - vkd3d_shader_scan_descriptor_info -+ * - vkd3d_shader_scan_signature_info - * - vkd3d_shader_spirv_domain_shader_target_info - * - vkd3d_shader_spirv_target_info - * - vkd3d_shader_transform_feedback_info -@@ -1784,6 +1853,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver - * Parse shader source code or byte code, returning various types of requested - * information. - * -+ * The \a source_type member of \a compile_info must be set to the type of the -+ * shader. -+ * -+ * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which -+ * case vkd3d_shader_scan() will return information about the shader in -+ * isolation. Alternatively, it may be set to a valid compilation target for the -+ * shader, in which case vkd3d_shader_scan() will return information that -+ * reflects the interface for a shader as it will be compiled to that target. -+ * In this case other chained structures may be appended to \a compile_info as -+ * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, -+ * such as vkd3d_shader_spirv_target_info. -+ * -+ * (For a hypothetical example, suppose the source shader distinguishes float -+ * and integer texture data, but the target environment does not support integer -+ * textures. In this case vkd3d_shader_compile() might translate integer -+ * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would -+ * accurately report whether the texture expects integer or float data, but -+ * using the relevant specific target type would report -+ * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) -+ * - * Currently this function supports the following code types: - * - VKD3D_SHADER_SOURCE_DXBC_TPF - * -@@ -1791,6 +1880,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver - * \n - * The DXBC_TPF scanner supports the following chained structures: - * - vkd3d_shader_scan_descriptor_info -+ * - vkd3d_shader_scan_signature_info - * \n - * Although the \a compile_info parameter is read-only, chained structures - * passed to this function need not be, and may serve as output parameters, -@@ -1827,12 +1917,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); - - /** -- * Read the input signature of a compiled shader, returning a structural -+ * Read the input signature of a compiled DXBC shader, returning a structural - * description which can be easily parsed by C code. - * - * This function parses a compiled shader. To parse a standalone root signature, - * use vkd3d_shader_parse_root_signature(). - * -+ * This function only parses DXBC shaders, and only retrieves the input -+ * signature. To retrieve signatures from other shader types, or other signature -+ * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. -+ * This function returns the same input signature that is returned in -+ * struct vkd3d_shader_scan_signature_info. -+ * - * \param dxbc Compiled byte code, in DXBC format. - * - * \param signature Output location in which the parsed root signature will be -@@ -2022,6 +2118,19 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb - VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, - const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); - -+/** -+ * Free members of struct vkd3d_shader_scan_signature_info allocated by -+ * vkd3d_shader_scan(). -+ * -+ * This function may free members of vkd3d_shader_scan_signature_info, but -+ * does not free the structure itself. -+ * -+ * \param info Scan information to free. -+ * -+ * \since 1.9 -+ */ -+VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); -+ - #endif /* VKD3D_SHADER_NO_PROTOTYPES */ - - /** Type of vkd3d_shader_get_version(). */ -@@ -2087,6 +2196,9 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, - typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, - const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); - -+/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ -+typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); -+ - #ifdef __cplusplus - } - #endif /* __cplusplus */ -diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c -index b363efbd360..aa7df5bd764 100644 ---- a/libs/vkd3d/libs/vkd3d-common/debug.c -+++ b/libs/vkd3d/libs/vkd3d-common/debug.c -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include - #ifdef HAVE_PTHREAD_H - #include - #endif -@@ -44,11 +45,11 @@ extern const char *const vkd3d_dbg_env_name; - - static const char *const debug_level_names[] = - { -- /* VKD3D_DBG_LEVEL_NONE */ "none", -- /* VKD3D_DBG_LEVEL_ERR */ "err", -- /* VKD3D_DBG_LEVEL_FIXME */ "fixme", -- /* VKD3D_DBG_LEVEL_WARN */ "warn", -- /* VKD3D_DBG_LEVEL_TRACE */ "trace", -+ [VKD3D_DBG_LEVEL_NONE ] = "none", -+ [VKD3D_DBG_LEVEL_ERR ] = "err", -+ [VKD3D_DBG_LEVEL_FIXME] = "fixme", -+ [VKD3D_DBG_LEVEL_WARN ] = "warn", -+ [VKD3D_DBG_LEVEL_TRACE] = "trace", - }; - - enum vkd3d_dbg_level vkd3d_dbg_get_level(void) -@@ -105,7 +106,13 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch - - assert(level < ARRAY_SIZE(debug_level_names)); - -+#ifdef _WIN32 -+ vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); -+#elif HAVE_GETTID -+ vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); -+#else - vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); -+#endif - va_start(args, fmt); - vkd3d_dbg_voutput(fmt, args); - va_end(args); -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 0a821b5c878..d72402eb250 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -578,17 +578,17 @@ static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, e - { - static const char *const resource_type_names[] = - { -- /* VKD3D_SHADER_RESOURCE_NONE */ "none", -- /* VKD3D_SHADER_RESOURCE_BUFFER */ "buffer", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ "texture1d", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ "texture2d", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ "texture2dms", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ "texture3d", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ "texturecube", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ "texture1darray", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ "texture2darray", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ "texture2dmsarray", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ "texturecubearray", -+ [VKD3D_SHADER_RESOURCE_NONE ] = "none", -+ [VKD3D_SHADER_RESOURCE_BUFFER ] = "buffer", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_1D ] = "texture1d", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2D ] = "texture2d", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ] = "texture2dms", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_3D ] = "texture3d", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE ] = "texturecube", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY ] = "texture1darray", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY ] = "texture2darray", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = "texture2dmsarray", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = "texturecubearray", - }; - - if (type < ARRAY_SIZE(resource_type_names)) -@@ -601,19 +601,19 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, const - { - static const char *const data_type_names[] = - { -- /* VKD3D_DATA_FLOAT */ "float", -- /* VKD3D_DATA_INT */ "int", -- /* VKD3D_DATA_RESOURCE */ "resource", -- /* VKD3D_DATA_SAMPLER */ "sampler", -- /* VKD3D_DATA_UAV */ "uav", -- /* VKD3D_DATA_UINT */ "uint", -- /* VKD3D_DATA_UNORM */ "unorm", -- /* VKD3D_DATA_SNORM */ "snorm", -- /* VKD3D_DATA_OPAQUE */ "opaque", -- /* VKD3D_DATA_MIXED */ "mixed", -- /* VKD3D_DATA_DOUBLE */ "double", -- /* VKD3D_DATA_CONTINUED */ "", -- /* VKD3D_DATA_UNUSED */ "", -+ [VKD3D_DATA_FLOAT ] = "float", -+ [VKD3D_DATA_INT ] = "int", -+ [VKD3D_DATA_RESOURCE ] = "resource", -+ [VKD3D_DATA_SAMPLER ] = "sampler", -+ [VKD3D_DATA_UAV ] = "uav", -+ [VKD3D_DATA_UINT ] = "uint", -+ [VKD3D_DATA_UNORM ] = "unorm", -+ [VKD3D_DATA_SNORM ] = "snorm", -+ [VKD3D_DATA_OPAQUE ] = "opaque", -+ [VKD3D_DATA_MIXED ] = "mixed", -+ [VKD3D_DATA_DOUBLE ] = "double", -+ [VKD3D_DATA_CONTINUED] = "", -+ [VKD3D_DATA_UNUSED ] = "", - }; - const char *name; - int i; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 712613ac13b..369112ce18d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -260,9 +260,9 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = - /* Declarations */ - {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, - /* Constant definitions */ -- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, -+ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, -- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, -+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, - /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, -@@ -327,9 +327,9 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = - /* Declarations */ - {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, - /* Constant definitions */ -- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, -+ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, -- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, -+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, - /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, -@@ -490,6 +490,255 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader - dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; - } - -+static struct signature_element *find_signature_element(const struct shader_signature *signature, -+ const char *semantic_name, unsigned int semantic_index) -+{ -+ struct signature_element *e = signature->elements; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) -+ && e[i].semantic_index == semantic_index) -+ return &e[i]; -+ } -+ -+ return NULL; -+} -+ -+static struct signature_element *find_signature_element_by_register_index( -+ const struct shader_signature *signature, unsigned int register_index) -+{ -+ struct signature_element *e = signature->elements; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ if (e[i].register_index == register_index) -+ return &e[i]; -+ } -+ -+ return NULL; -+} -+ -+static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, -+ const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, -+ unsigned int register_index, bool is_dcl, unsigned int mask) -+{ -+ struct shader_signature *signature; -+ struct signature_element *element; -+ -+ if (output) -+ signature = &sm1->p.shader_desc.output_signature; -+ else -+ signature = &sm1->p.shader_desc.input_signature; -+ -+ if ((element = find_signature_element(signature, name, index))) -+ { -+ element->mask |= mask; -+ if (!is_dcl) -+ element->used_mask |= mask; -+ return true; -+ } -+ -+ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, -+ signature->element_count + 1, sizeof(*signature->elements))) -+ return false; -+ element = &signature->elements[signature->element_count++]; -+ -+ element->semantic_name = name; -+ element->semantic_index = index; -+ element->stream_index = 0; -+ element->sysval_semantic = sysval; -+ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ element->register_index = register_index; -+ element->register_count = 1; -+ element->mask = mask; -+ element->used_mask = is_dcl ? 0 : mask; -+ element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; -+ -+ return true; -+} -+ -+static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, -+ unsigned int register_index, unsigned int mask) -+{ -+ struct shader_signature *signature; -+ struct signature_element *element; -+ -+ if (output) -+ signature = &sm1->p.shader_desc.output_signature; -+ else -+ signature = &sm1->p.shader_desc.input_signature; -+ -+ if (!(element = find_signature_element_by_register_index(signature, register_index))) -+ { -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, -+ "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); -+ return; -+ } -+ -+ element->used_mask |= mask; -+} -+ -+static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) -+{ -+ unsigned int register_index = reg->idx[0].offset; -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL -+ && sm1->p.shader_version.major == 1 && !register_index) -+ return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_NONE, 0, is_dcl, mask); -+ return true; -+ -+ case VKD3DSPR_INPUT: -+ /* For vertex shaders or sm3 pixel shaders, we should have already -+ * had a DCL instruction. Otherwise, this is a colour input. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) -+ { -+ add_signature_mask(sm1, false, register_index, mask); -+ return true; -+ } -+ return add_signature_element(sm1, false, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_TEXTURE: -+ /* For vertex shaders, this is ADDR. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ return true; -+ return add_signature_element(sm1, false, "TEXCOORD", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_OUTPUT: -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ { -+ /* For sm < 2 vertex shaders, this is TEXCRDOUT. -+ * -+ * For sm3 vertex shaders, this is OUTPUT, but we already -+ * should have had a DCL instruction. */ -+ if (sm1->p.shader_version.major == 3) -+ { -+ add_signature_mask(sm1, true, register_index, mask); -+ return true; -+ } -+ return add_signature_element(sm1, true, "TEXCOORD", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ } -+ /* fall through */ -+ -+ case VKD3DSPR_ATTROUT: -+ case VKD3DSPR_COLOROUT: -+ return add_signature_element(sm1, true, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_DEPTHOUT: -+ return add_signature_element(sm1, true, "DEPTH", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ case VKD3DSPR_RASTOUT: -+ switch (register_index) -+ { -+ case 0: -+ return add_signature_element(sm1, true, "POSITION", 0, -+ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); -+ -+ case 1: -+ return add_signature_element(sm1, true, "FOG", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ case 2: -+ return add_signature_element(sm1, true, "PSIZE", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ default: -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, -+ "Invalid rasterizer output index %u.", register_index); -+ return true; -+ } -+ -+ case VKD3DSPR_MISCTYPE: -+ switch (register_index) -+ { -+ case 0: -+ return add_signature_element(sm1, false, "VPOS", 0, -+ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); -+ -+ case 1: -+ return add_signature_element(sm1, false, "VFACE", 0, -+ VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); -+ -+ default: -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, -+ "Invalid miscellaneous fragment input index %u.", register_index); -+ return true; -+ } -+ -+ default: -+ return true; -+ } -+} -+ -+static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_semantic *semantic) -+{ -+ const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; -+ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -+ unsigned int mask = semantic->resource.reg.write_mask; -+ bool output; -+ -+ static const char sm1_semantic_names[][13] = -+ { -+ [VKD3D_DECL_USAGE_POSITION ] = "POSITION", -+ [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", -+ [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", -+ [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", -+ [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", -+ [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", -+ [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", -+ [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", -+ [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", -+ [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", -+ [VKD3D_DECL_USAGE_COLOR ] = "COLOR", -+ [VKD3D_DECL_USAGE_FOG ] = "FOG", -+ [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", -+ [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", -+ }; -+ -+ if (reg->type == VKD3DSPR_OUTPUT) -+ output = true; -+ else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) -+ output = false; -+ else /* vpos and vface don't have a semantic. */ -+ return add_signature_element_from_register(sm1, reg, true, mask); -+ -+ /* sm2 pixel shaders use DCL but don't provide a semantic. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) -+ return add_signature_element_from_register(sm1, reg, true, mask); -+ -+ /* With the exception of vertex POSITION output, none of these are system -+ * values. Pixel POSITION input is not equivalent to SV_Position; the closer -+ * equivalent is VPOS, which is not declared as a semantic. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX -+ && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) -+ sysval = VKD3D_SHADER_SV_POSITION; -+ -+ return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], -+ semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); -+} -+ -+static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_register *reg, unsigned int mask) -+{ -+ uint32_t register_index = reg->idx[0].offset; -+ -+ if (reg->type == VKD3DSPR_TEMP) -+ sm1->p.shader_desc.temp_count = max(sm1->p.shader_desc.temp_count, register_index + 1); -+ -+ add_signature_element_from_register(sm1, reg, false, mask); -+} -+ - /* Read a parameter token from the input stream, and possibly a relative - * addressing token. */ - static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, -@@ -640,6 +889,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, - range = &semantic->resource.range; - range->space = 0; - range->first = range->last = semantic->resource.reg.reg.idx[0].offset; -+ -+ add_signature_element_from_semantic(sm1, semantic); - } - - static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, -@@ -744,6 +995,14 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, - } - } - -+static unsigned int mask_from_swizzle(unsigned int swizzle) -+{ -+ return (1u << vkd3d_swizzle_get_component(swizzle, 0)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 1)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 2)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 3)); -+} -+ - static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) - { - struct vkd3d_shader_src_param *src_params, *predicate; -@@ -832,7 +1091,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - { - /* Destination token */ - if (ins->dst_count) -+ { - shader_sm1_read_dst_param(sm1, &p, dst_param); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask); -+ } - - /* Predication token */ - if (ins->predicate) -@@ -840,7 +1102,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - - /* Other source tokens */ - for (i = 0; i < ins->src_count; ++i) -+ { - shader_sm1_read_src_param(sm1, &p, &src_params[i]); -+ shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle)); -+ } - } - - if (sm1->abort) -@@ -1553,12 +1818,13 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - - for (i = 0; i < ctx->constant_defs.count; ++i) - { -+ const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { - .type = D3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, -- .reg = i, -+ .reg = constant_reg->index, - }; - - if (ctx->profile->major_version > 1) -@@ -1567,7 +1833,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - - write_sm1_dst_register(buffer, ®); - for (x = 0; x < 4; ++x) -- put_f32(buffer, ctx->constant_defs.values[i].f[x]); -+ put_f32(buffer, constant_reg->value.f[x]); - } - } - -@@ -1844,6 +2110,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - -+static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -+ -+ switch (jump->type) -+ { -+ case HLSL_IR_JUMP_DISCARD_NEG: -+ { -+ struct hlsl_reg *reg = &jump->condition.node->reg; -+ -+ struct sm1_instruction instr = -+ { -+ .opcode = VKD3D_SM1_OP_TEXKILL, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.reg = reg->id, -+ .dst.writemask = reg->writemask, -+ .has_dst = 1, -+ }; -+ -+ write_sm1_instruction(ctx, buffer, &instr); -+ break; -+ } -+ -+ default: -+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); -+ } -+} -+ - static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_load *load = hlsl_ir_load(instr); -@@ -2038,6 +2333,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - write_sm1_expr(ctx, buffer, instr); - break; - -+ case HLSL_IR_JUMP: -+ write_sm1_jump(ctx, buffer, instr); -+ break; -+ - case HLSL_IR_LOAD: - write_sm1_load(ctx, buffer, instr); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 3e3f06faeb5..716b7bdb721 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -493,8 +493,14 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - return ret; - break; - -+ case TAG_DXIL: - case TAG_SHDR: - case TAG_SHEX: -+ if ((section->tag == TAG_DXIL) != desc->is_dxil) -+ { -+ TRACE("Skipping chunk %#x.\n", section->tag); -+ break; -+ } - if (desc->byte_code) - FIXME("Multiple shader code chunks.\n"); - desc->byte_code = section->data.code; -@@ -505,10 +511,6 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - TRACE("Skipping AON9 shader code chunk.\n"); - break; - -- case TAG_DXIL: -- FIXME("Skipping DXIL shader model 6+ code chunk.\n"); -- break; -- - default: - TRACE("Skipping chunk %#x.\n", section->tag); - break; -@@ -529,12 +531,6 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - { - int ret; - -- desc->byte_code = NULL; -- desc->byte_code_size = 0; -- memset(&desc->input_signature, 0, sizeof(desc->input_signature)); -- memset(&desc->output_signature, 0, sizeof(desc->output_signature)); -- memset(&desc->patch_constant_signature, 0, sizeof(desc->patch_constant_signature)); -- - ret = for_each_dxbc_section(dxbc, message_context, source_name, shdr_handler, desc); - if (!desc->byte_code) - ret = VKD3D_ERROR_INVALID_ARGUMENT; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -new file mode 100644 -index 00000000000..53a4c2da4ba ---- /dev/null -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -0,0 +1,2313 @@ -+/* -+ * Copyright 2023 Conor McCarthy for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#include "vkd3d_shader_private.h" -+ -+#define VKD3D_SM6_VERSION_MAJOR(version) (((version) >> 4) & 0xf) -+#define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) -+ -+#define BITCODE_MAGIC VKD3D_MAKE_TAG('B', 'C', 0xc0, 0xde) -+ -+enum bitcode_block_id -+{ -+ BLOCKINFO_BLOCK = 0, -+ MODULE_BLOCK = 8, -+ PARAMATTR_BLOCK = 9, -+ PARAMATTR_GROUP_BLOCK = 10, -+ CONSTANTS_BLOCK = 11, -+ FUNCTION_BLOCK = 12, -+ VALUE_SYMTAB_BLOCK = 14, -+ METADATA_BLOCK = 15, -+ METADATA_ATTACHMENT_BLOCK = 16, -+ TYPE_BLOCK = 17, -+ USELIST_BLOCK = 18, -+}; -+ -+enum bitcode_blockinfo_code -+{ -+ SETBID = 1, -+ BLOCKNAME = 2, -+ SETRECORDNAME = 3, -+}; -+ -+enum bitcode_block_abbreviation -+{ -+ END_BLOCK = 0, -+ ENTER_SUBBLOCK = 1, -+ DEFINE_ABBREV = 2, -+ UNABBREV_RECORD = 3, -+}; -+ -+enum bitcode_abbrev_type -+{ -+ ABBREV_FIXED = 1, -+ ABBREV_VBR = 2, -+ ABBREV_ARRAY = 3, -+ ABBREV_CHAR = 4, -+ ABBREV_BLOB = 5, -+}; -+ -+enum bitcode_address_space -+{ -+ ADDRESS_SPACE_DEFAULT, -+ ADDRESS_SPACE_DEVICEMEM, -+ ADDRESS_SPACE_CBUFFER, -+ ADDRESS_SPACE_GROUPSHARED, -+}; -+ -+enum bitcode_module_code -+{ -+ MODULE_CODE_VERSION = 1, -+ MODULE_CODE_GLOBALVAR = 7, -+ MODULE_CODE_FUNCTION = 8, -+}; -+ -+enum bitcode_constant_code -+{ -+ CST_CODE_SETTYPE = 1, -+ CST_CODE_NULL = 2, -+ CST_CODE_UNDEF = 3, -+ CST_CODE_INTEGER = 4, -+ CST_CODE_FLOAT = 6, -+ CST_CODE_STRING = 8, -+ CST_CODE_CE_GEP = 12, -+ CST_CODE_CE_INBOUNDS_GEP = 20, -+ CST_CODE_DATA = 22, -+}; -+ -+enum bitcode_function_code -+{ -+ FUNC_CODE_DECLAREBLOCKS = 1, -+ FUNC_CODE_INST_BINOP = 2, -+ FUNC_CODE_INST_CAST = 3, -+ FUNC_CODE_INST_RET = 10, -+ FUNC_CODE_INST_BR = 11, -+ FUNC_CODE_INST_SWITCH = 12, -+ FUNC_CODE_INST_PHI = 16, -+ FUNC_CODE_INST_ALLOCA = 19, -+ FUNC_CODE_INST_LOAD = 20, -+ FUNC_CODE_INST_EXTRACTVAL = 26, -+ FUNC_CODE_INST_CMP2 = 28, -+ FUNC_CODE_INST_VSELECT = 29, -+ FUNC_CODE_INST_CALL = 34, -+ FUNC_CODE_INST_ATOMICRMW = 38, -+ FUNC_CODE_INST_LOADATOMIC = 41, -+ FUNC_CODE_INST_GEP = 43, -+ FUNC_CODE_INST_STORE = 44, -+ FUNC_CODE_INST_STOREATOMIC = 45, -+ FUNC_CODE_INST_CMPXCHG = 46, -+}; -+ -+enum bitcode_type_code -+{ -+ TYPE_CODE_NUMENTRY = 1, -+ TYPE_CODE_VOID = 2, -+ TYPE_CODE_FLOAT = 3, -+ TYPE_CODE_DOUBLE = 4, -+ TYPE_CODE_LABEL = 5, -+ TYPE_CODE_INTEGER = 7, -+ TYPE_CODE_POINTER = 8, -+ TYPE_CODE_HALF = 10, -+ TYPE_CODE_ARRAY = 11, -+ TYPE_CODE_VECTOR = 12, -+ TYPE_CODE_METADATA = 16, -+ TYPE_CODE_STRUCT_ANON = 18, -+ TYPE_CODE_STRUCT_NAME = 19, -+ TYPE_CODE_STRUCT_NAMED = 20, -+ TYPE_CODE_FUNCTION = 21, -+}; -+ -+enum bitcode_value_symtab_code -+{ -+ VST_CODE_ENTRY = 1, -+ VST_CODE_BBENTRY = 2, -+}; -+ -+struct sm6_pointer_info -+{ -+ const struct sm6_type *type; -+ enum bitcode_address_space addr_space; -+}; -+ -+struct sm6_struct_info -+{ -+ const char *name; -+ unsigned int elem_count; -+ const struct sm6_type *elem_types[]; -+}; -+ -+struct sm6_function_info -+{ -+ const struct sm6_type *ret_type; -+ unsigned int param_count; -+ const struct sm6_type *param_types[]; -+}; -+ -+struct sm6_array_info -+{ -+ unsigned int count; -+ const struct sm6_type *elem_type; -+}; -+ -+enum sm6_type_class -+{ -+ TYPE_CLASS_VOID, -+ TYPE_CLASS_INTEGER, -+ TYPE_CLASS_FLOAT, -+ TYPE_CLASS_POINTER, -+ TYPE_CLASS_STRUCT, -+ TYPE_CLASS_FUNCTION, -+ TYPE_CLASS_VECTOR, -+ TYPE_CLASS_ARRAY, -+ TYPE_CLASS_LABEL, -+ TYPE_CLASS_METADATA, -+}; -+ -+struct sm6_type -+{ -+ enum sm6_type_class class; -+ union -+ { -+ unsigned int width; -+ struct sm6_pointer_info pointer; -+ struct sm6_struct_info *struc; -+ struct sm6_function_info *function; -+ struct sm6_array_info array; -+ } u; -+}; -+ -+enum sm6_value_type -+{ -+ VALUE_TYPE_FUNCTION, -+ VALUE_TYPE_REG, -+}; -+ -+struct sm6_function_data -+{ -+ const char *name; -+ bool is_prototype; -+ unsigned int attribs_id; -+}; -+ -+struct sm6_value -+{ -+ const struct sm6_type *type; -+ enum sm6_value_type value_type; -+ union -+ { -+ struct sm6_function_data function; -+ struct vkd3d_shader_register reg; -+ } u; -+}; -+ -+struct dxil_record -+{ -+ unsigned int code; -+ unsigned int operand_count; -+ uint64_t operands[]; -+}; -+ -+struct sm6_symbol -+{ -+ unsigned int id; -+ const char *name; -+}; -+ -+struct sm6_block -+{ -+ struct vkd3d_shader_instruction *instructions; -+ size_t instruction_capacity; -+ size_t instruction_count; -+}; -+ -+struct sm6_function -+{ -+ const struct sm6_value *declaration; -+ -+ struct sm6_block *blocks[1]; -+ size_t block_count; -+}; -+ -+struct dxil_block -+{ -+ const struct dxil_block *parent; -+ enum bitcode_block_id id; -+ unsigned int abbrev_len; -+ unsigned int start; -+ unsigned int length; -+ unsigned int level; -+ -+ /* The abbrev, block and record structs are not relocatable. */ -+ struct dxil_abbrev **abbrevs; -+ size_t abbrev_capacity; -+ size_t abbrev_count; -+ unsigned int blockinfo_bid; -+ bool has_bid; -+ -+ struct dxil_block **child_blocks; -+ size_t child_block_capacity; -+ size_t child_block_count; -+ -+ struct dxil_record **records; -+ size_t record_capacity; -+ size_t record_count; -+}; -+ -+struct sm6_parser -+{ -+ const uint32_t *ptr, *start, *end; -+ unsigned int bitpos; -+ -+ struct dxil_block root_block; -+ struct dxil_block *current_block; -+ -+ struct dxil_global_abbrev **abbrevs; -+ size_t abbrev_capacity; -+ size_t abbrev_count; -+ -+ struct sm6_type *types; -+ size_t type_count; -+ -+ struct sm6_symbol *global_symbols; -+ size_t global_symbol_count; -+ -+ struct sm6_function *functions; -+ size_t function_count; -+ -+ struct sm6_value *values; -+ size_t value_count; -+ size_t value_capacity; -+ -+ struct vkd3d_shader_parser p; -+}; -+ -+struct dxil_abbrev_operand -+{ -+ uint64_t context; -+ bool (*read_operand)(struct sm6_parser *sm6, uint64_t context, uint64_t *operand); -+}; -+ -+struct dxil_abbrev -+{ -+ unsigned int count; -+ bool is_array; -+ struct dxil_abbrev_operand operands[]; -+}; -+ -+struct dxil_global_abbrev -+{ -+ unsigned int block_id; -+ struct dxil_abbrev abbrev; -+}; -+ -+static size_t size_add_with_overflow_check(size_t a, size_t b) -+{ -+ size_t i = a + b; -+ return (i < a) ? SIZE_MAX : i; -+} -+ -+static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) -+{ -+ return CONTAINING_RECORD(parser, struct sm6_parser, p); -+} -+ -+static bool sm6_parser_is_end(struct sm6_parser *sm6) -+{ -+ return sm6->ptr == sm6->end; -+} -+ -+static uint32_t sm6_parser_read_uint32(struct sm6_parser *sm6) -+{ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return 0; -+ } -+ return *sm6->ptr++; -+} -+ -+static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length) -+{ -+ unsigned int l, prev_len = 0; -+ uint32_t bits; -+ -+ if (!length) -+ return 0; -+ -+ assert(length < 32); -+ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return 0; -+ } -+ -+ assert(sm6->bitpos < 32); -+ bits = *sm6->ptr >> sm6->bitpos; -+ l = 32 - sm6->bitpos; -+ if (l <= length) -+ { -+ ++sm6->ptr; -+ if (sm6_parser_is_end(sm6) && l < length) -+ { -+ sm6->p.failed = true; -+ return bits; -+ } -+ sm6->bitpos = 0; -+ bits |= *sm6->ptr << l; -+ prev_len = l; -+ } -+ sm6->bitpos += length - prev_len; -+ -+ return bits & ((1 << length) - 1); -+} -+ -+static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length) -+{ -+ unsigned int bits, flag, mask, shift = 0; -+ uint64_t result = 0; -+ -+ if (!length) -+ return 0; -+ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return 0; -+ } -+ -+ flag = 1 << (length - 1); -+ mask = flag - 1; -+ do -+ { -+ bits = sm6_parser_read_bits(sm6, length); -+ result |= (uint64_t)(bits & mask) << shift; -+ shift += length - 1; -+ } while ((bits & flag) && !sm6->p.failed && shift < 64); -+ -+ sm6->p.failed |= !!(bits & flag); -+ -+ return result; -+} -+ -+static void sm6_parser_align_32(struct sm6_parser *sm6) -+{ -+ if (!sm6->bitpos) -+ return; -+ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return; -+ } -+ -+ ++sm6->ptr; -+ sm6->bitpos = 0; -+} -+ -+static bool dxil_block_handle_blockinfo_record(struct dxil_block *block, struct dxil_record *record) -+{ -+ /* BLOCKINFO blocks must only occur immediately below the module root block. */ -+ if (block->level > 1) -+ { -+ WARN("Invalid blockinfo block level %u.\n", block->level); -+ return false; -+ } -+ -+ switch (record->code) -+ { -+ case SETBID: -+ if (!record->operand_count) -+ { -+ WARN("Missing id operand.\n"); -+ return false; -+ } -+ if (record->operands[0] > UINT_MAX) -+ WARN("Truncating block id %"PRIu64".\n", record->operands[0]); -+ block->blockinfo_bid = record->operands[0]; -+ block->has_bid = true; -+ break; -+ case BLOCKNAME: -+ case SETRECORDNAME: -+ break; -+ default: -+ FIXME("Unhandled BLOCKINFO record type %u.\n", record->code); -+ break; -+ } -+ -+ return true; -+} -+ -+static enum vkd3d_result dxil_block_add_record(struct dxil_block *block, struct dxil_record *record) -+{ -+ unsigned int reserve; -+ -+ switch (block->id) -+ { -+ /* Rough initial reserve sizes for small shaders. */ -+ case CONSTANTS_BLOCK: reserve = 32; break; -+ case FUNCTION_BLOCK: reserve = 128; break; -+ case METADATA_BLOCK: reserve = 32; break; -+ case TYPE_BLOCK: reserve = 32; break; -+ default: reserve = 8; break; -+ } -+ reserve = max(reserve, block->record_count + 1); -+ if (!vkd3d_array_reserve((void **)&block->records, &block->record_capacity, reserve, sizeof(*block->records))) -+ { -+ ERR("Failed to allocate %u records.\n", reserve); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (block->id == BLOCKINFO_BLOCK && !dxil_block_handle_blockinfo_record(block, record)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ block->records[block->record_count++] = record; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6) -+{ -+ struct dxil_block *block = sm6->current_block; -+ enum vkd3d_result ret = VKD3D_OK; -+ unsigned int code, count, i; -+ struct dxil_record *record; -+ -+ code = sm6_parser_read_vbr(sm6, 6); -+ -+ count = sm6_parser_read_vbr(sm6, 6); -+ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) -+ { -+ ERR("Failed to allocate record with %u operands.\n", count); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ record->code = code; -+ record->operand_count = count; -+ -+ for (i = 0; i < count; ++i) -+ record->operands[i] = sm6_parser_read_vbr(sm6, 6); -+ if (sm6->p.failed) -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ -+ if (ret < 0 || (ret = dxil_block_add_record(block, record)) < 0) -+ vkd3d_free(record); -+ -+ return ret; -+} -+ -+static bool sm6_parser_read_literal_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = context; -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_fixed_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = sm6_parser_read_bits(sm6, context); -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_vbr_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = sm6_parser_read_vbr(sm6, context); -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_char6_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[sm6_parser_read_bits(sm6, 6)]; -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_blob_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ int count = sm6_parser_read_vbr(sm6, 6); -+ sm6_parser_align_32(sm6); -+ for (; count > 0; count -= 4) -+ sm6_parser_read_uint32(sm6); -+ FIXME("Unhandled blob operand.\n"); -+ return false; -+} -+ -+static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned int count, struct sm6_parser *sm6) -+{ -+ enum bitcode_abbrev_type prev_type, type; -+ unsigned int i; -+ -+ abbrev->is_array = false; -+ -+ for (i = 0, prev_type = 0; i < count && !sm6->p.failed; ++i) -+ { -+ if (sm6_parser_read_bits(sm6, 1)) -+ { -+ if (prev_type == ABBREV_ARRAY) -+ { -+ WARN("Unexpected literal abbreviation after array.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 8); -+ abbrev->operands[i].read_operand = sm6_parser_read_literal_operand; -+ continue; -+ } -+ -+ switch (type = sm6_parser_read_bits(sm6, 3)) -+ { -+ case ABBREV_FIXED: -+ case ABBREV_VBR: -+ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 5); -+ abbrev->operands[i].read_operand = (type == ABBREV_FIXED) ? sm6_parser_read_fixed_operand -+ : sm6_parser_read_vbr_operand; -+ break; -+ -+ case ABBREV_ARRAY: -+ if (prev_type == ABBREV_ARRAY || i != count - 2) -+ { -+ WARN("Unexpected array abbreviation.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ abbrev->is_array = true; -+ --i; -+ --count; -+ break; -+ -+ case ABBREV_CHAR: -+ abbrev->operands[i].read_operand = sm6_parser_read_char6_operand; -+ break; -+ -+ case ABBREV_BLOB: -+ if (prev_type == ABBREV_ARRAY || i != count - 1) -+ { -+ WARN("Unexpected blob abbreviation.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ abbrev->operands[i].read_operand = sm6_parser_read_blob_operand; -+ break; -+ } -+ -+ prev_type = type; -+ } -+ -+ abbrev->count = count; -+ -+ return sm6->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) -+{ -+ struct dxil_block *block = sm6->current_block; -+ unsigned int count = sm6_parser_read_vbr(sm6, 5); -+ struct dxil_global_abbrev *global_abbrev; -+ enum vkd3d_result ret; -+ -+ assert(block->id == BLOCKINFO_BLOCK); -+ -+ if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) -+ || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) -+ { -+ ERR("Failed to allocate global abbreviation.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = dxil_abbrev_init(&global_abbrev->abbrev, count, sm6)) < 0) -+ { -+ vkd3d_free(global_abbrev); -+ return ret; -+ } -+ -+ if (!block->has_bid) -+ { -+ WARN("Missing blockinfo block id.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (block->blockinfo_bid == MODULE_BLOCK) -+ { -+ FIXME("Unhandled global abbreviation for module block.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ global_abbrev->block_id = block->blockinfo_bid; -+ -+ sm6->abbrevs[sm6->abbrev_count++] = global_abbrev; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_add_block_abbrev(struct sm6_parser *sm6) -+{ -+ struct dxil_block *block = sm6->current_block; -+ struct dxil_abbrev *abbrev; -+ enum vkd3d_result ret; -+ unsigned int count; -+ -+ if (block->id == BLOCKINFO_BLOCK) -+ return sm6_parser_add_global_abbrev(sm6); -+ -+ count = sm6_parser_read_vbr(sm6, 5); -+ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, block->abbrev_count + 1, sizeof(*block->abbrevs)) -+ || !(abbrev = vkd3d_malloc(sizeof(*abbrev) + count * sizeof(abbrev->operands[0])))) -+ { -+ ERR("Failed to allocate block abbreviation.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = dxil_abbrev_init(abbrev, count, sm6)) < 0) -+ { -+ vkd3d_free(abbrev); -+ return ret; -+ } -+ -+ block->abbrevs[block->abbrev_count++] = abbrev; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_read_abbrev_record(struct sm6_parser *sm6, unsigned int abbrev_id) -+{ -+ enum vkd3d_result ret = VKD3D_ERROR_INVALID_SHADER; -+ struct dxil_block *block = sm6->current_block; -+ struct dxil_record *temp, *record; -+ unsigned int i, count, array_len; -+ struct dxil_abbrev *abbrev; -+ uint64_t code; -+ -+ if (abbrev_id >= block->abbrev_count) -+ { -+ WARN("Invalid abbreviation id %u.\n", abbrev_id); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ abbrev = block->abbrevs[abbrev_id]; -+ if (!(count = abbrev->count)) -+ return VKD3D_OK; -+ if (count == 1 && abbrev->is_array) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ /* First operand is the record code. The array is included in the count, but will be done separately. */ -+ count -= abbrev->is_array + 1; -+ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) -+ { -+ ERR("Failed to allocate record with %u operands.\n", count); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (!abbrev->operands[0].read_operand(sm6, abbrev->operands[0].context, &code)) -+ goto fail; -+ if (code > UINT_MAX) -+ FIXME("Truncating 64-bit record code %#"PRIx64".\n", code); -+ record->code = code; -+ -+ for (i = 0; i < count; ++i) -+ if (!abbrev->operands[i + 1].read_operand(sm6, abbrev->operands[i + 1].context, &record->operands[i])) -+ goto fail; -+ record->operand_count = count; -+ -+ /* An array can occur only as the last operand. */ -+ if (abbrev->is_array) -+ { -+ array_len = sm6_parser_read_vbr(sm6, 6); -+ if (!(temp = vkd3d_realloc(record, sizeof(*record) + (count + array_len) * sizeof(record->operands[0])))) -+ { -+ ERR("Failed to allocate record with %u operands.\n", count + array_len); -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto fail; -+ } -+ record = temp; -+ -+ for (i = 0; i < array_len; ++i) -+ { -+ if (!abbrev->operands[count + 1].read_operand(sm6, abbrev->operands[count + 1].context, -+ &record->operands[count + i])) -+ { -+ goto fail; -+ } -+ } -+ record->operand_count += array_len; -+ } -+ -+ if ((ret = dxil_block_add_record(block, record)) < 0) -+ goto fail; -+ -+ return VKD3D_OK; -+ -+fail: -+ vkd3d_free(record); -+ return ret; -+} -+ -+static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, -+ struct sm6_parser *sm6); -+ -+static enum vkd3d_result dxil_block_read(struct dxil_block *parent, struct sm6_parser *sm6) -+{ -+ unsigned int reserve = (parent->id == MODULE_BLOCK) ? 12 : 2; -+ struct dxil_block *block; -+ enum vkd3d_result ret; -+ -+ sm6->current_block = parent; -+ -+ do -+ { -+ unsigned int abbrev_id = sm6_parser_read_bits(sm6, parent->abbrev_len); -+ -+ switch (abbrev_id) -+ { -+ case END_BLOCK: -+ sm6_parser_align_32(sm6); -+ return VKD3D_OK; -+ -+ case ENTER_SUBBLOCK: -+ if (parent->id != MODULE_BLOCK && parent->id != FUNCTION_BLOCK) -+ { -+ WARN("Invalid subblock parent id %u.\n", parent->id); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!vkd3d_array_reserve((void **)&parent->child_blocks, &parent->child_block_capacity, -+ max(reserve, parent->child_block_count + 1), sizeof(*parent->child_blocks)) -+ || !(block = vkd3d_calloc(1, sizeof(*block)))) -+ { -+ ERR("Failed to allocate block.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = dxil_block_init(block, parent, sm6)) < 0) -+ { -+ vkd3d_free(block); -+ return ret; -+ } -+ -+ parent->child_blocks[parent->child_block_count++] = block; -+ sm6->current_block = parent; -+ break; -+ -+ case DEFINE_ABBREV: -+ if ((ret = sm6_parser_add_block_abbrev(sm6)) < 0) -+ return ret; -+ break; -+ -+ case UNABBREV_RECORD: -+ if ((ret = sm6_parser_read_unabbrev_record(sm6)) < 0) -+ { -+ WARN("Failed to read unabbreviated record.\n"); -+ return ret; -+ } -+ break; -+ -+ default: -+ if ((ret = sm6_parser_read_abbrev_record(sm6, abbrev_id - 4)) < 0) -+ { -+ WARN("Failed to read abbreviated record.\n"); -+ return ret; -+ } -+ break; -+ } -+ } while (!sm6->p.failed); -+ -+ return VKD3D_ERROR_INVALID_SHADER; -+} -+ -+static size_t sm6_parser_compute_global_abbrev_count_for_block_id(struct sm6_parser *sm6, -+ unsigned int block_id) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < sm6->abbrev_count; ++i) -+ count += sm6->abbrevs[i]->block_id == block_id; -+ -+ return count; -+} -+ -+static void dxil_block_destroy(struct dxil_block *block) -+{ -+ size_t i; -+ -+ for (i = 0; i < block->record_count; ++i) -+ vkd3d_free(block->records[i]); -+ vkd3d_free(block->records); -+ -+ for (i = 0; i < block->child_block_count; ++i) -+ { -+ dxil_block_destroy(block->child_blocks[i]); -+ vkd3d_free(block->child_blocks[i]); -+ } -+ vkd3d_free(block->child_blocks); -+ -+ block->records = NULL; -+ block->record_count = 0; -+ block->child_blocks = NULL; -+ block->child_block_count = 0; -+} -+ -+static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, -+ struct sm6_parser *sm6) -+{ -+ size_t i, abbrev_count = 0; -+ enum vkd3d_result ret; -+ -+ block->parent = parent; -+ block->level = parent ? parent->level + 1 : 0; -+ block->id = sm6_parser_read_vbr(sm6, 8); -+ block->abbrev_len = sm6_parser_read_vbr(sm6, 4); -+ sm6_parser_align_32(sm6); -+ block->length = sm6_parser_read_uint32(sm6); -+ block->start = sm6->ptr - sm6->start; -+ -+ if (sm6->p.failed) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if ((block->abbrev_count = sm6_parser_compute_global_abbrev_count_for_block_id(sm6, block->id))) -+ { -+ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, -+ block->abbrev_count, sizeof(*block->abbrevs))) -+ { -+ ERR("Failed to allocate block abbreviations.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < sm6->abbrev_count; ++i) -+ if (sm6->abbrevs[i]->block_id == block->id) -+ block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; -+ -+ assert(abbrev_count == block->abbrev_count); -+ } -+ -+ if ((ret = dxil_block_read(block, sm6)) < 0) -+ dxil_block_destroy(block); -+ -+ for (i = abbrev_count; i < block->abbrev_count; ++i) -+ vkd3d_free(block->abbrevs[i]); -+ vkd3d_free(block->abbrevs); -+ block->abbrevs = NULL; -+ block->abbrev_count = 0; -+ -+ return ret; -+} -+ -+static size_t dxil_block_compute_function_count(const struct dxil_block *root) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < root->child_block_count; ++i) -+ count += root->child_blocks[i]->id == FUNCTION_BLOCK; -+ -+ return count; -+} -+ -+static size_t dxil_block_compute_module_decl_count(const struct dxil_block *block) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < block->record_count; ++i) -+ count += block->records[i]->code == MODULE_CODE_FUNCTION; -+ return count; -+} -+ -+static size_t dxil_block_compute_constants_count(const struct dxil_block *block) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < block->record_count; ++i) -+ count += block->records[i]->code != CST_CODE_SETTYPE; -+ return count; -+} -+ -+static void dxil_global_abbrevs_cleanup(struct dxil_global_abbrev **abbrevs, size_t count) -+{ -+ size_t i; -+ -+ for (i = 0; i < count; ++i) -+ vkd3d_free(abbrevs[i]); -+ vkd3d_free(abbrevs); -+} -+ -+static const struct dxil_block *sm6_parser_get_level_one_block(const struct sm6_parser *sm6, -+ enum bitcode_block_id id, bool *is_unique) -+{ -+ const struct dxil_block *block, *found = NULL; -+ size_t i; -+ -+ for (i = 0, *is_unique = true; i < sm6->root_block.child_block_count; ++i) -+ { -+ block = sm6->root_block.child_blocks[i]; -+ if (block->id != id) -+ continue; -+ -+ if (!found) -+ found = block; -+ else -+ *is_unique = false; -+ } -+ -+ return found; -+} -+ -+static char *dxil_record_to_string(const struct dxil_record *record, unsigned int offset) -+{ -+ unsigned int i; -+ char *str; -+ -+ assert(offset <= record->operand_count); -+ if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) -+ return NULL; -+ -+ for (i = offset; i < record->operand_count; ++i) -+ str[i - offset] = record->operands[i]; -+ -+ return str; -+} -+ -+static bool dxil_record_validate_operand_min_count(const struct dxil_record *record, unsigned int min_count, -+ struct sm6_parser *sm6) -+{ -+ if (record->operand_count >= min_count) -+ return true; -+ -+ WARN("Invalid operand count %u for code %u.\n", record->operand_count, record->code); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Invalid operand count %u for record code %u.", record->operand_count, record->code); -+ return false; -+} -+ -+static void dxil_record_validate_operand_max_count(const struct dxil_record *record, unsigned int max_count, -+ struct sm6_parser *sm6) -+{ -+ if (record->operand_count <= max_count) -+ return; -+ -+ WARN("Ignoring %u extra operands for code %u.\n", record->operand_count - max_count, record->code); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %u extra operands for record code %u.", record->operand_count - max_count, record->code); -+} -+ -+static bool dxil_record_validate_operand_count(const struct dxil_record *record, unsigned int min_count, -+ unsigned int max_count, struct sm6_parser *sm6) -+{ -+ dxil_record_validate_operand_max_count(record, max_count, sm6); -+ return dxil_record_validate_operand_min_count(record, min_count, sm6); -+} -+ -+static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) -+{ -+ const struct dxil_record *record; -+ size_t i, type_count, type_index; -+ const struct dxil_block *block; -+ char *struct_name = NULL; -+ unsigned int j, count; -+ struct sm6_type *type; -+ uint64_t type_id; -+ bool is_unique; -+ -+ sm6->p.location.line = 0; -+ sm6->p.location.column = 0; -+ -+ if (!(block = sm6_parser_get_level_one_block(sm6, TYPE_BLOCK, &is_unique))) -+ { -+ WARN("No type definitions found.\n"); -+ return VKD3D_OK; -+ } -+ if (!is_unique) -+ WARN("Ignoring invalid extra type table(s).\n"); -+ -+ sm6->p.location.line = block->id; -+ -+ type_count = 0; -+ for (i = 0; i < block->record_count; ++i) -+ type_count += block->records[i]->code != TYPE_CODE_NUMENTRY && block->records[i]->code != TYPE_CODE_STRUCT_NAME; -+ -+ /* The type array must not be relocated. */ -+ if (!(sm6->types = vkd3d_calloc(type_count, sizeof(*sm6->types)))) -+ { -+ ERR("Failed to allocate type array.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ -+ type = &sm6->types[sm6->type_count]; -+ type_index = sm6->type_count; -+ -+ switch (record->code) -+ { -+ case TYPE_CODE_ARRAY: -+ case TYPE_CODE_VECTOR: -+ if (!dxil_record_validate_operand_count(record, 2, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ type->class = record->code == TYPE_CODE_ARRAY ? TYPE_CLASS_ARRAY : TYPE_CLASS_VECTOR; -+ -+ if (!(type->u.array.count = record->operands[0])) -+ { -+ TRACE("Setting unbounded for type %zu.\n", type_index); -+ type->u.array.count = UINT_MAX; -+ } -+ -+ if ((type_id = record->operands[1]) >= type_count) -+ { -+ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.array.elem_type = &sm6->types[type_id]; -+ break; -+ -+ case TYPE_CODE_DOUBLE: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_FLOAT; -+ type->u.width = 64; -+ break; -+ -+ case TYPE_CODE_FLOAT: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_FLOAT; -+ type->u.width = 32; -+ break; -+ -+ case TYPE_CODE_FUNCTION: -+ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ if (record->operands[0]) -+ FIXME("Unhandled vararg function type %zu.\n", type_index); -+ -+ type->class = TYPE_CLASS_FUNCTION; -+ -+ if ((type_id = record->operands[1]) >= type_count) -+ { -+ WARN("Invalid return type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ count = record->operand_count - 2; -+ if (vkd3d_object_range_overflow(sizeof(type->u.function), count, sizeof(type->u.function->param_types[0])) -+ || !(type->u.function = vkd3d_malloc(offsetof(struct sm6_function_info, param_types[count])))) -+ { -+ ERR("Failed to allocate function parameter types.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ type->u.function->ret_type = &sm6->types[type_id]; -+ type->u.function->param_count = count; -+ for (j = 0; j < count; ++j) -+ { -+ if ((type_id = record->operands[j + 2]) >= type_count) -+ { -+ WARN("Invalid parameter type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ vkd3d_free(type->u.function); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.function->param_types[j] = &sm6->types[type_id]; -+ } -+ break; -+ -+ case TYPE_CODE_HALF: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_FLOAT; -+ type->u.width = 16; -+ break; -+ -+ case TYPE_CODE_INTEGER: -+ { -+ uint64_t width; -+ -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ type->class = TYPE_CLASS_INTEGER; -+ -+ switch ((width = record->operands[0])) -+ { -+ case 1: -+ case 8: -+ case 16: -+ case 32: -+ case 64: -+ break; -+ default: -+ WARN("Invalid integer width %"PRIu64" for type %zu.\n", width, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.width = width; -+ break; -+ } -+ -+ case TYPE_CODE_LABEL: -+ type->class = TYPE_CLASS_LABEL; -+ break; -+ -+ case TYPE_CODE_METADATA: -+ type->class = TYPE_CLASS_METADATA; -+ break; -+ -+ case TYPE_CODE_NUMENTRY: -+ continue; -+ -+ case TYPE_CODE_POINTER: -+ if (!dxil_record_validate_operand_count(record, 1, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ type->class = TYPE_CLASS_POINTER; -+ -+ if ((type_id = record->operands[0]) >= type_count) -+ { -+ WARN("Invalid pointee type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.pointer.type = &sm6->types[type_id]; -+ type->u.pointer.addr_space = (record->operand_count > 1) ? record->operands[1] : ADDRESS_SPACE_DEFAULT; -+ break; -+ -+ case TYPE_CODE_STRUCT_ANON: -+ case TYPE_CODE_STRUCT_NAMED: -+ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ if (record->code == TYPE_CODE_STRUCT_NAMED && !struct_name) -+ { -+ WARN("Missing struct name before struct type %zu.\n", type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ type->class = TYPE_CLASS_STRUCT; -+ -+ count = record->operand_count - 1; -+ if (vkd3d_object_range_overflow(sizeof(type->u.struc), count, sizeof(type->u.struc->elem_types[0])) -+ || !(type->u.struc = vkd3d_malloc(offsetof(struct sm6_struct_info, elem_types[count])))) -+ { -+ ERR("Failed to allocate struct element types.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (record->operands[0]) -+ FIXME("Ignoring struct packed attribute.\n"); -+ -+ type->u.struc->elem_count = count; -+ for (j = 0; j < count; ++j) -+ { -+ if ((type_id = record->operands[j + 1]) >= type_count) -+ { -+ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ vkd3d_free(type->u.struc); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.struc->elem_types[j] = &sm6->types[type_id]; -+ } -+ -+ if (record->code == TYPE_CODE_STRUCT_ANON) -+ { -+ type->u.struc->name = NULL; -+ break; -+ } -+ -+ type->u.struc->name = struct_name; -+ struct_name = NULL; -+ break; -+ -+ case TYPE_CODE_STRUCT_NAME: -+ if (!(struct_name = dxil_record_to_string(record, 0))) -+ { -+ ERR("Failed to allocate struct name.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ if (!struct_name[0]) -+ WARN("Struct name is empty for type %zu.\n", type_index); -+ continue; -+ -+ case TYPE_CODE_VOID: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_VOID; -+ break; -+ -+ default: -+ FIXME("Unhandled type %u at index %zu.\n", record->code, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ ++sm6->type_count; -+ } -+ -+ assert(sm6->type_count == type_count); -+ -+ if (struct_name) -+ { -+ WARN("Unused struct name %s.\n", struct_name); -+ vkd3d_free(struct_name); -+ } -+ -+ return VKD3D_OK; -+} -+ -+static inline bool sm6_type_is_void(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_VOID; -+} -+ -+static inline bool sm6_type_is_integer(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER; -+} -+ -+static inline bool sm6_type_is_floating_point(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_FLOAT; -+} -+ -+static inline bool sm6_type_is_numeric(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER || type->class == TYPE_CLASS_FLOAT; -+} -+ -+static inline bool sm6_type_is_pointer(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_POINTER; -+} -+ -+static bool sm6_type_is_numeric_aggregate(const struct sm6_type *type) -+{ -+ unsigned int i; -+ -+ switch (type->class) -+ { -+ case TYPE_CLASS_ARRAY: -+ case TYPE_CLASS_VECTOR: -+ return sm6_type_is_numeric(type->u.array.elem_type); -+ -+ case TYPE_CLASS_STRUCT: -+ /* Do not handle nested structs. Support can be added if they show up. */ -+ for (i = 0; i < type->u.struc->elem_count; ++i) -+ if (!sm6_type_is_numeric(type->u.struc->elem_types[i])) -+ return false; -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+static inline bool sm6_type_is_struct(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_STRUCT; -+} -+ -+static inline bool sm6_type_is_function(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_FUNCTION; -+} -+ -+static inline bool sm6_type_is_function_pointer(const struct sm6_type *type) -+{ -+ return sm6_type_is_pointer(type) && sm6_type_is_function(type->u.pointer.type); -+} -+ -+static inline bool sm6_type_is_handle(const struct sm6_type *type) -+{ -+ return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Handle"); -+} -+ -+static inline const struct sm6_type *sm6_type_get_element_type(const struct sm6_type *type) -+{ -+ return (type->class == TYPE_CLASS_ARRAY || type->class == TYPE_CLASS_VECTOR) ? type->u.array.elem_type : type; -+} -+ -+static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type *type, -+ enum bitcode_address_space addr_space, struct sm6_parser *sm6) -+{ -+ size_t i, start = type - sm6->types; -+ const struct sm6_type *pointer_type; -+ -+ /* DXC seems usually to place the pointer type immediately after its pointee. */ -+ for (i = (start + 1) % sm6->type_count; i != start; i = (i + 1) % sm6->type_count) -+ { -+ pointer_type = &sm6->types[i]; -+ if (sm6_type_is_pointer(pointer_type) && pointer_type->u.pointer.type == type -+ && pointer_type->u.pointer.addr_space == addr_space) -+ return pointer_type; -+ } -+ -+ return NULL; -+} -+ -+static const struct sm6_type *sm6_parser_get_type(struct sm6_parser *sm6, uint64_t type_id) -+{ -+ if (type_id >= sm6->type_count) -+ { -+ WARN("Invalid type index %"PRIu64" at %zu.\n", type_id, sm6->value_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID, -+ "DXIL type id %"PRIu64" is invalid.", type_id); -+ return NULL; -+ } -+ return &sm6->types[type_id]; -+} -+ -+static int global_symbol_compare(const void *a, const void *b) -+{ -+ return vkd3d_u32_compare(((const struct sm6_symbol *)a)->id, ((const struct sm6_symbol *)b)->id); -+} -+ -+static enum vkd3d_result sm6_parser_symtab_init(struct sm6_parser *sm6) -+{ -+ const struct dxil_record *record; -+ const struct dxil_block *block; -+ struct sm6_symbol *symbol; -+ size_t i, count; -+ bool is_unique; -+ -+ sm6->p.location.line = 0; -+ sm6->p.location.column = 0; -+ -+ if (!(block = sm6_parser_get_level_one_block(sm6, VALUE_SYMTAB_BLOCK, &is_unique))) -+ { -+ /* There should always be at least one symbol: the name of the entry point function. */ -+ WARN("No value symtab block found.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!is_unique) -+ FIXME("Ignoring extra value symtab block(s).\n"); -+ -+ sm6->p.location.line = block->id; -+ -+ for (i = 0, count = 0; i < block->record_count; ++i) -+ count += block->records[i]->code == VST_CODE_ENTRY; -+ -+ if (!(sm6->global_symbols = vkd3d_calloc(count, sizeof(*sm6->global_symbols)))) -+ { -+ ERR("Failed to allocate global symbols.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ -+ if (record->code != VST_CODE_ENTRY) -+ { -+ FIXME("Unhandled symtab code %u.\n", record->code); -+ continue; -+ } -+ if (!dxil_record_validate_operand_min_count(record, 1, sm6)) -+ continue; -+ -+ symbol = &sm6->global_symbols[sm6->global_symbol_count]; -+ symbol->id = record->operands[0]; -+ if (!(symbol->name = dxil_record_to_string(record, 1))) -+ { -+ ERR("Failed to allocate symbol name.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ ++sm6->global_symbol_count; -+ } -+ -+ sm6->p.location.column = block->record_count; -+ -+ qsort(sm6->global_symbols, sm6->global_symbol_count, sizeof(*sm6->global_symbols), global_symbol_compare); -+ for (i = 1; i < sm6->global_symbol_count; ++i) -+ { -+ if (sm6->global_symbols[i].id == sm6->global_symbols[i - 1].id) -+ { -+ WARN("Invalid duplicate symbol id %u.\n", sm6->global_symbols[i].id); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static const char *sm6_parser_get_global_symbol_name(const struct sm6_parser *sm6, size_t id) -+{ -+ size_t i, start; -+ -+ /* id == array index is normally true */ -+ i = start = id % sm6->global_symbol_count; -+ do -+ { -+ if (sm6->global_symbols[i].id == id) -+ return sm6->global_symbols[i].name; -+ i = (i + 1) % sm6->global_symbol_count; -+ } while (i != start); -+ -+ return NULL; -+} -+ -+static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) -+{ -+ assert(fn->value_type == VALUE_TYPE_FUNCTION); -+ return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); -+} -+ -+static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) -+{ -+ assert(sm6->value_count < sm6->value_capacity); -+ return &sm6->values[sm6->value_count]; -+} -+ -+static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type *type) -+{ -+ if (type->class == TYPE_CLASS_INTEGER) -+ { -+ switch (type->u.width) -+ { -+ case 8: -+ return VKD3D_DATA_UINT8; -+ case 32: -+ return VKD3D_DATA_UINT; -+ default: -+ FIXME("Unhandled width %u.\n", type->u.width); -+ return VKD3D_DATA_UINT; -+ } -+ } -+ else if (type->class == TYPE_CLASS_FLOAT) -+ { -+ switch (type->u.width) -+ { -+ case 32: -+ return VKD3D_DATA_FLOAT; -+ case 64: -+ return VKD3D_DATA_DOUBLE; -+ default: -+ FIXME("Unhandled width %u.\n", type->u.width); -+ return VKD3D_DATA_FLOAT; -+ } -+ } -+ -+ FIXME("Unhandled type %u.\n", type->class); -+ return VKD3D_DATA_UINT; -+} -+ -+/* Recurse through the block tree while maintaining a current value count. The current -+ * count is the sum of the global count plus all declarations within the current function. -+ * Store into value_capacity the highest count seen. */ -+static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, -+ const struct dxil_block *block, size_t value_count) -+{ -+ size_t i, old_value_count = value_count; -+ -+ if (block->id == MODULE_BLOCK) -+ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_module_decl_count(block)); -+ -+ for (i = 0; i < block->child_block_count; ++i) -+ value_count = sm6_parser_compute_max_value_count(sm6, block->child_blocks[i], value_count); -+ -+ switch (block->id) -+ { -+ case CONSTANTS_BLOCK: -+ /* Function local constants are contained in a child block of the function block. */ -+ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_constants_count(block)); -+ break; -+ case FUNCTION_BLOCK: -+ /* A function must start with a block count, which emits no value. This formula is likely to -+ * overestimate the value count somewhat, but this should be no problem. */ -+ value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); -+ sm6->value_capacity = max(sm6->value_capacity, value_count); -+ /* The value count returns to its previous value after handling a function. */ -+ if (value_count < SIZE_MAX) -+ value_count = old_value_count; -+ break; -+ default: -+ break; -+ } -+ -+ return value_count; -+} -+ -+static bool sm6_parser_declare_function(struct sm6_parser *sm6, const struct dxil_record *record) -+{ -+ const unsigned int max_count = 15; -+ const struct sm6_type *ret_type; -+ struct sm6_value *fn; -+ unsigned int i, j; -+ -+ if (!dxil_record_validate_operand_count(record, 8, max_count, sm6)) -+ return false; -+ -+ fn = sm6_parser_get_current_value(sm6); -+ fn->value_type = VALUE_TYPE_FUNCTION; -+ if (!(fn->u.function.name = sm6_parser_get_global_symbol_name(sm6, sm6->value_count))) -+ { -+ WARN("Missing symbol name for function %zu.\n", sm6->value_count); -+ fn->u.function.name = ""; -+ } -+ -+ if (!(fn->type = sm6_parser_get_type(sm6, record->operands[0]))) -+ return false; -+ if (!sm6_type_is_function(fn->type)) -+ { -+ WARN("Type is not a function.\n"); -+ return false; -+ } -+ ret_type = fn->type->u.function->ret_type; -+ -+ if (!(fn->type = sm6_type_get_pointer_to_type(fn->type, ADDRESS_SPACE_DEFAULT, sm6))) -+ { -+ WARN("Failed to get pointer type for type %u.\n", fn->type->class); -+ return false; -+ } -+ -+ if (record->operands[1]) -+ WARN("Ignoring calling convention %#"PRIx64".\n", record->operands[1]); -+ -+ fn->u.function.is_prototype = !!record->operands[2]; -+ -+ if (record->operands[3]) -+ WARN("Ignoring linkage %#"PRIx64".\n", record->operands[3]); -+ -+ if (record->operands[4] > UINT_MAX) -+ WARN("Invalid attributes id %#"PRIx64".\n", record->operands[4]); -+ /* 1-based index. */ -+ if ((fn->u.function.attribs_id = record->operands[4])) -+ TRACE("Ignoring function attributes.\n"); -+ -+ /* These always seem to be zero. */ -+ for (i = 5, j = 0; i < min(record->operand_count, max_count); ++i) -+ j += !!record->operands[i]; -+ if (j) -+ WARN("Ignoring %u operands.\n", j); -+ -+ if (sm6_value_is_dx_intrinsic_dcl(fn) && !sm6_type_is_void(ret_type) && !sm6_type_is_numeric(ret_type) -+ && !sm6_type_is_numeric_aggregate(ret_type) && !sm6_type_is_handle(ret_type)) -+ { -+ WARN("Unexpected return type for dx intrinsic function '%s'.\n", fn->u.function.name); -+ } -+ -+ ++sm6->value_count; -+ -+ return true; -+} -+ -+static inline uint64_t decode_rotated_signed_value(uint64_t value) -+{ -+ if (value != 1) -+ { -+ bool neg = value & 1; -+ value >>= 1; -+ return neg ? -value : value; -+ } -+ return value << 63; -+} -+ -+static inline float bitcast_uint64_to_float(uint64_t value) -+{ -+ union -+ { -+ uint32_t uint32_value; -+ float float_value; -+ } u; -+ -+ u.uint32_value = value; -+ return u.float_value; -+} -+ -+static inline double bitcast_uint64_to_double(uint64_t value) -+{ -+ union -+ { -+ uint64_t uint64_value; -+ double double_value; -+ } u; -+ -+ u.uint64_value = value; -+ return u.double_value; -+} -+ -+static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) -+{ -+ enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; -+ const struct sm6_type *type, *elem_type; -+ enum vkd3d_data_type reg_data_type; -+ const struct dxil_record *record; -+ struct sm6_value *dst; -+ size_t i, value_idx; -+ uint64_t value; -+ -+ for (i = 0, type = NULL; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ value_idx = sm6->value_count; -+ -+ if (record->code == CST_CODE_SETTYPE) -+ { -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if (!(type = sm6_parser_get_type(sm6, record->operands[0]))) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ elem_type = sm6_type_get_element_type(type); -+ if (sm6_type_is_numeric(elem_type)) -+ { -+ reg_data_type = vkd3d_data_type_from_sm6_type(elem_type); -+ reg_type = elem_type->u.width > 32 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST; -+ } -+ else -+ { -+ reg_data_type = VKD3D_DATA_UNUSED; -+ reg_type = VKD3DSPR_INVALID; -+ } -+ -+ if (i == block->record_count - 1) -+ WARN("Unused SETTYPE record.\n"); -+ -+ continue; -+ } -+ -+ if (!type) -+ { -+ WARN("Constant record %zu has no type.\n", value_idx); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ dst = sm6_parser_get_current_value(sm6); -+ dst->type = type; -+ dst->value_type = VALUE_TYPE_REG; -+ dst->u.reg.type = reg_type; -+ dst->u.reg.immconst_type = VKD3D_IMMCONST_SCALAR; -+ dst->u.reg.data_type = reg_data_type; -+ -+ switch (record->code) -+ { -+ case CST_CODE_NULL: -+ /* Register constant data is already zero-filled. */ -+ break; -+ -+ case CST_CODE_INTEGER: -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if (!sm6_type_is_integer(type)) -+ { -+ WARN("Invalid integer of non-integer type %u at constant idx %zu.\n", type->class, value_idx); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ value = decode_rotated_signed_value(record->operands[0]); -+ if (type->u.width <= 32) -+ dst->u.reg.u.immconst_uint[0] = value & ((1ull << type->u.width) - 1); -+ else -+ dst->u.reg.u.immconst_uint64[0] = value; -+ -+ break; -+ -+ case CST_CODE_FLOAT: -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if (!sm6_type_is_floating_point(type)) -+ { -+ WARN("Invalid float of non-fp type %u at constant idx %zu.\n", type->class, value_idx); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (type->u.width == 16) -+ FIXME("Half float type is not supported yet.\n"); -+ else if (type->u.width == 32) -+ dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); -+ else if (type->u.width == 64) -+ dst->u.reg.u.immconst_double[0] = bitcast_uint64_to_double(record->operands[0]); -+ else -+ vkd3d_unreachable(); -+ -+ break; -+ -+ case CST_CODE_DATA: -+ WARN("Unhandled constant array.\n"); -+ break; -+ -+ default: -+ FIXME("Unhandled constant code %u.\n", record->code); -+ break; -+ } -+ -+ ++sm6->value_count; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) -+{ -+ const struct dxil_block *block = &sm6->root_block; -+ const struct dxil_record *record; -+ uint64_t version; -+ size_t i; -+ -+ sm6->p.location.line = block->id; -+ sm6->p.location.column = 0; -+ -+ for (i = 0; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ switch (record->code) -+ { -+ case MODULE_CODE_FUNCTION: -+ if (!sm6_parser_declare_function(sm6, record)) -+ { -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL, -+ "A DXIL function declaration is invalid."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ break; -+ -+ case MODULE_CODE_GLOBALVAR: -+ FIXME("Global variables are not implemented yet.\n"); -+ break; -+ -+ case MODULE_CODE_VERSION: -+ dxil_record_validate_operand_count(record, 1, 1, sm6); -+ if ((version = record->operands[0]) != 1) -+ { -+ FIXME("Unsupported format version %#"PRIx64".\n", version); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT, -+ "Bitcode format version %#"PRIx64" is unsupported.", version); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ break; -+ -+ default: -+ break; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) -+{ -+ size_t i, count = sm6->function_count; -+ -+ for (i = 0; i < sm6->value_count; ++i) -+ { -+ if (sm6_type_is_function_pointer(sm6->values[i].type) && !sm6->values[i].u.function.is_prototype && !count--) -+ break; -+ } -+ if (i == sm6->value_count) -+ return NULL; -+ -+ ++sm6->function_count; -+ return &sm6->values[i]; -+} -+ -+static struct sm6_block *sm6_block_create() -+{ -+ struct sm6_block *block = vkd3d_calloc(1, sizeof(*block)); -+ return block; -+} -+ -+static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct sm6_block *code_block, struct vkd3d_shader_instruction *ins) -+{ -+ if (!dxil_record_validate_operand_count(record, 0, 1, sm6)) -+ return; -+ -+ if (record->operand_count) -+ FIXME("Non-void return is not implemented.\n"); -+ -+ ins->handler_idx = VKD3DSIH_NOP; -+} -+ -+static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, -+ struct sm6_function *function) -+{ -+ struct vkd3d_shader_instruction *ins; -+ const struct dxil_record *record; -+ struct sm6_block *code_block; -+ struct sm6_value *dst; -+ size_t i, block_idx; -+ bool ret_found; -+ enum -+ { -+ RESULT_VALUE, -+ RESULT_TERMINATE, -+ } result_type; -+ -+ if (sm6->function_count) -+ { -+ FIXME("Multiple functions are not supported yet.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!(function->declaration = sm6_parser_next_function_definition(sm6))) -+ { -+ WARN("Failed to find definition to match function body.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (block->record_count < 2) -+ { -+ /* It should contain at least a block count and a RET instruction. */ -+ WARN("Invalid function block record count %zu.\n", block->record_count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (block->records[0]->code != FUNC_CODE_DECLAREBLOCKS || !block->records[0]->operand_count -+ || block->records[0]->operands[0] > UINT_MAX) -+ { -+ WARN("Block count declaration not found or invalid.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!(function->block_count = block->records[0]->operands[0])) -+ { -+ WARN("Function contains no blocks.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (function->block_count > 1) -+ { -+ FIXME("Branched shaders are not supported yet.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!(function->blocks[0] = sm6_block_create())) -+ { -+ ERR("Failed to allocate code block.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ code_block = function->blocks[0]; -+ -+ for (i = 1, block_idx = 0, ret_found = false; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ -+ /* block->record_count - 1 is the instruction count, but some instructions -+ * can emit >1 IR instruction, so extra may be used. */ -+ if (!vkd3d_array_reserve((void **)&code_block->instructions, &code_block->instruction_capacity, -+ max(code_block->instruction_count + 1, block->record_count), sizeof(*code_block->instructions))) -+ { -+ ERR("Failed to allocate instructions.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ ins = &code_block->instructions[code_block->instruction_count]; -+ ins->handler_idx = VKD3DSIH_INVALID; -+ -+ dst = sm6_parser_get_current_value(sm6); -+ dst->type = NULL; -+ dst->value_type = VALUE_TYPE_REG; -+ result_type = RESULT_VALUE; -+ -+ record = block->records[i]; -+ switch (record->code) -+ { -+ case FUNC_CODE_INST_RET: -+ sm6_parser_emit_ret(sm6, record, code_block, ins); -+ result_type = RESULT_TERMINATE; -+ ret_found = true; -+ break; -+ default: -+ FIXME("Unhandled dxil instruction %u.\n", record->code); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (result_type == RESULT_TERMINATE) -+ { -+ ++block_idx; -+ code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; -+ } -+ if (code_block) -+ code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; -+ else -+ assert(ins->handler_idx == VKD3DSIH_NOP); -+ sm6->value_count += !!dst->type; -+ } -+ -+ if (!ret_found) -+ { -+ WARN("Function contains no RET instruction.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_module_init(struct sm6_parser *sm6, const struct dxil_block *block, -+ unsigned int level) -+{ -+ size_t i, old_value_count = sm6->value_count; -+ struct sm6_function *function; -+ enum vkd3d_result ret; -+ -+ for (i = 0; i < block->child_block_count; ++i) -+ { -+ if ((ret = sm6_parser_module_init(sm6, block->child_blocks[i], level + 1)) < 0) -+ return ret; -+ } -+ -+ sm6->p.location.line = block->id; -+ sm6->p.location.column = 0; -+ -+ switch (block->id) -+ { -+ case CONSTANTS_BLOCK: -+ return sm6_parser_constants_init(sm6, block); -+ -+ case FUNCTION_BLOCK: -+ function = &sm6->functions[sm6->function_count]; -+ if ((ret = sm6_parser_function_init(sm6, block, function)) < 0) -+ return ret; -+ /* The value index returns to its previous value after handling a function. It's usually nonzero -+ * at the start because of global constants/variables/function declarations. Function constants -+ * occur in a child block, so value_count is already saved before they are emitted. */ -+ memset(&sm6->values[old_value_count], 0, (sm6->value_count - old_value_count) * sizeof(*sm6->values)); -+ sm6->value_count = old_value_count; -+ break; -+ -+ case BLOCKINFO_BLOCK: -+ case MODULE_BLOCK: -+ case PARAMATTR_BLOCK: -+ case PARAMATTR_GROUP_BLOCK: -+ case VALUE_SYMTAB_BLOCK: -+ case METADATA_BLOCK: -+ case METADATA_ATTACHMENT_BLOCK: -+ case TYPE_BLOCK: -+ break; -+ -+ default: -+ FIXME("Unhandled block id %u.\n", block->id); -+ break; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static void sm6_type_table_cleanup(struct sm6_type *types, size_t count) -+{ -+ size_t i; -+ -+ if (!types) -+ return; -+ -+ for (i = 0; i < count; ++i) -+ { -+ switch (types[i].class) -+ { -+ case TYPE_CLASS_STRUCT: -+ vkd3d_free((void *)types[i].u.struc->name); -+ vkd3d_free(types[i].u.struc); -+ break; -+ case TYPE_CLASS_FUNCTION: -+ vkd3d_free(types[i].u.function); -+ break; -+ default: -+ break; -+ } -+ } -+ -+ vkd3d_free(types); -+} -+ -+static void sm6_symtab_cleanup(struct sm6_symbol *symbols, size_t count) -+{ -+ size_t i; -+ -+ for (i = 0; i < count; ++i) -+ vkd3d_free((void *)symbols[i].name); -+ vkd3d_free(symbols); -+} -+ -+static void sm6_block_destroy(struct sm6_block *block) -+{ -+ vkd3d_free(block->instructions); -+ vkd3d_free(block); -+} -+ -+static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) -+{ -+ size_t i, j; -+ -+ for (i = 0; i < count; ++i) -+ { -+ for (j = 0; j < functions[i].block_count; ++j) -+ sm6_block_destroy(functions[i].blocks[j]); -+ } -+ vkd3d_free(functions); -+} -+ -+static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) -+{ -+ struct sm6_parser *sm6 = sm6_parser(parser); -+ -+ dxil_block_destroy(&sm6->root_block); -+ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); -+ shader_instruction_array_destroy(&parser->instructions); -+ sm6_type_table_cleanup(sm6->types, sm6->type_count); -+ sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); -+ sm6_functions_cleanup(sm6->functions, sm6->function_count); -+ vkd3d_free(sm6->values); -+ free_shader_desc(&parser->shader_desc); -+ vkd3d_free(sm6); -+} -+ -+static const struct vkd3d_shader_parser_ops sm6_parser_ops = -+{ -+ .parser_destroy = sm6_parser_destroy, -+}; -+ -+static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, -+ const char *source_name, struct vkd3d_shader_message_context *message_context) -+{ -+ const struct vkd3d_shader_location location = {.source_name = source_name}; -+ uint32_t version_token, dxil_version, token_count, magic; -+ unsigned int chunk_offset, chunk_size; -+ size_t count, length, function_count; -+ enum bitcode_block_abbreviation abbr; -+ struct vkd3d_shader_version version; -+ struct dxil_block *block; -+ enum vkd3d_result ret; -+ -+ count = byte_code_size / sizeof(*byte_code); -+ if (count < 6) -+ { -+ WARN("Invalid data size %zu.\n", byte_code_size); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE, -+ "DXIL chunk size %zu is smaller than the DXIL header size.", byte_code_size); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ version_token = byte_code[0]; -+ TRACE("Compiler version: 0x%08x.\n", version_token); -+ token_count = byte_code[1]; -+ TRACE("Token count: %u.\n", token_count); -+ -+ if (token_count < 6 || count < token_count) -+ { -+ WARN("Invalid token count %u (word count %zu).\n", token_count, count); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, -+ "DXIL chunk token count %#x is invalid (word count %zu).", token_count, count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (byte_code[2] != TAG_DXIL) -+ WARN("Unknown magic number 0x%08x.\n", byte_code[2]); -+ -+ dxil_version = byte_code[3]; -+ if (dxil_version > 0x102) -+ WARN("Unknown DXIL version: 0x%08x.\n", dxil_version); -+ else -+ TRACE("DXIL version: 0x%08x.\n", dxil_version); -+ -+ chunk_offset = byte_code[4]; -+ if (chunk_offset < 16 || chunk_offset >= byte_code_size) -+ { -+ WARN("Invalid bitcode chunk offset %#x (data size %zu).\n", chunk_offset, byte_code_size); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET, -+ "DXIL bitcode chunk has invalid offset %#x (data size %#zx).", chunk_offset, byte_code_size); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ chunk_size = byte_code[5]; -+ if (chunk_size > byte_code_size - chunk_offset) -+ { -+ WARN("Invalid bitcode chunk size %#x (data size %zu, chunk offset %#x).\n", -+ chunk_size, byte_code_size, chunk_offset); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, -+ "DXIL bitcode chunk has invalid size %#x (data size %#zx, chunk offset %#x).", -+ chunk_size, byte_code_size, chunk_offset); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ sm6->start = (const uint32_t *)((const char*)&byte_code[2] + chunk_offset); -+ if ((magic = sm6->start[0]) != BITCODE_MAGIC) -+ { -+ WARN("Unknown magic number 0x%08x.\n", magic); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER, -+ "DXIL bitcode chunk magic number 0x%08x is not the expected 0x%08x.", magic, BITCODE_MAGIC); -+ } -+ -+ sm6->end = &sm6->start[(chunk_size + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; -+ -+ if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) -+ { -+ FIXME("Unknown shader type %#x.\n", version.type); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE, -+ "Unknown shader type %#x.", version.type); -+ } -+ -+ version.major = VKD3D_SM6_VERSION_MAJOR(version_token); -+ version.minor = VKD3D_SM6_VERSION_MINOR(version_token); -+ -+ if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) -+ { -+ WARN("Initial block abbreviation %u is not ENTER_SUBBLOCK.\n", abbr); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, -+ "DXIL bitcode chunk has invalid initial block abbreviation %u.", abbr); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ /* Estimate instruction count to avoid reallocation in most shaders. */ -+ count = max(token_count, 400) - 400; -+ vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, -+ (count + (count >> 2)) / 2u + 10); -+ sm6->ptr = &sm6->start[1]; -+ sm6->bitpos = 2; -+ -+ block = &sm6->root_block; -+ if ((ret = dxil_block_init(block, NULL, sm6)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL bitcode chunk."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, -+ "DXIL bitcode chunk has invalid bitcode."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); -+ sm6->abbrevs = NULL; -+ sm6->abbrev_count = 0; -+ -+ length = sm6->ptr - sm6->start - block->start; -+ if (length != block->length) -+ { -+ WARN("Invalid block length %zu; expected %u.\n", length, block->length); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH, -+ "Root block ends with length %zu but indicated length is %u.", length, block->length); -+ } -+ if (sm6->ptr != sm6->end) -+ { -+ size_t expected_length = sm6->end - sm6->start; -+ length = sm6->ptr - sm6->start; -+ WARN("Invalid module length %zu; expected %zu.\n", length, expected_length); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH, -+ "Module ends with length %zu but indicated length is %zu.", length, expected_length); -+ } -+ -+ if ((ret = sm6_parser_type_table_init(sm6)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL type table."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE, -+ "DXIL type table is invalid."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ if ((ret = sm6_parser_symtab_init(sm6)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL value symbol table."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB, -+ "DXIL value symbol table is invalid."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ function_count = dxil_block_compute_function_count(&sm6->root_block); -+ if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) -+ { -+ ERR("Failed to allocate function array.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating DXIL function array."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) -+ { -+ WARN("Value array count overflowed.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "Overflow occurred in the DXIL module value count."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) -+ { -+ ERR("Failed to allocate value array.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating DXIL value array."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = sm6_parser_globals_init(sm6)) < 0) -+ { -+ WARN("Failed to load global declarations.\n"); -+ return ret; -+ } -+ -+ if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL module."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "DXIL module is invalid."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ dxil_block_destroy(&sm6->root_block); -+ -+ return VKD3D_OK; -+} -+ -+int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -+{ -+ struct vkd3d_shader_desc *shader_desc; -+ uint32_t *byte_code = NULL; -+ struct sm6_parser *sm6; -+ int ret; -+ -+ if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) -+ { -+ ERR("Failed to allocate parser.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ shader_desc = &sm6->p.shader_desc; -+ shader_desc->is_dxil = true; -+ if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, -+ shader_desc)) < 0) -+ { -+ WARN("Failed to extract shader, vkd3d result %d.\n", ret); -+ vkd3d_free(sm6); -+ return ret; -+ } -+ -+ sm6->p.shader_desc = *shader_desc; -+ shader_desc = &sm6->p.shader_desc; -+ -+ if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) -+ { -+ /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC -+ * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ -+ if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) -+ ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); -+ else -+ memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); -+ } -+ -+ ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, -+ compile_info->source_name, message_context); -+ vkd3d_free(byte_code); -+ -+ if (ret < 0) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ sm6_parser_destroy(&sm6->p); -+ return ret; -+ } -+ -+ *parser = &sm6->p; -+ -+ return ret; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index ba5bcfbfaf0..ab508502623 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -430,6 +430,51 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl - return type; - } - -+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, -+ enum hlsl_regset regset, unsigned int index) -+{ -+ struct hlsl_type *next_type; -+ unsigned int offset = 0; -+ unsigned int idx; -+ -+ while (!type_is_single_component(type)) -+ { -+ next_type = type; -+ idx = traverse_path_from_component_index(ctx, &next_type, &index); -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ if (regset == HLSL_REGSET_NUMERIC) -+ offset += idx; -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ offset += type->e.record.fields[idx].reg_offset[regset]; -+ break; -+ -+ case HLSL_CLASS_ARRAY: -+ if (regset == HLSL_REGSET_NUMERIC) -+ offset += idx * align(type->e.array.type->reg_size[regset], 4); -+ else -+ offset += idx * type->e.array.type->reg_size[regset]; -+ break; -+ -+ case HLSL_CLASS_OBJECT: -+ assert(idx == 0); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ type = next_type; -+ } -+ -+ return offset; -+} -+ - static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, - unsigned int path_len) - { -@@ -524,7 +569,9 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de - unsigned int i; - - assert(deref); -- assert(!deref->offset.node); -+ -+ if (deref->offset.node) -+ return deref->data_type; - - type = deref->var->data_type; - for (i = 0; i < deref->path_len; ++i) -@@ -626,6 +673,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba - type->e.array.type = basic_type; - type->dimx = basic_type->dimx; - type->dimy = basic_type->dimy; -+ type->sampler_dim = basic_type->sampler_dim; - hlsl_type_calculate_reg_size(ctx, type); - - list_add_tail(&ctx->types, &type->entry); -@@ -992,20 +1040,31 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem - struct vkd3d_string_buffer *string; - struct hlsl_ir_var *var; - static LONG counter; -- const char *name; - - if (!(string = hlsl_get_string_buffer(ctx))) - return NULL; - vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); -- if (!(name = hlsl_strdup(ctx, string->buffer))) -- { -- hlsl_release_string_buffer(ctx, string); -- return NULL; -- } -- var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); -+ var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc, true); - hlsl_release_string_buffer(ctx, string); -+ return var; -+} -+ -+struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -+ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope) -+{ -+ struct hlsl_ir_var *var; -+ const char *name_copy; -+ -+ if (!(name_copy = hlsl_strdup(ctx, name))) -+ return NULL; -+ var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); - if (var) -- list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); -+ { -+ if (dummy_scope) -+ list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); -+ else -+ list_add_tail(&ctx->globals->vars, &var->scope_entry); -+ } - return var; - } - -@@ -1432,7 +1491,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v - } - - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, -- const struct vkd3d_shader_location *loc) -+ struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_jump *jump; - -@@ -1440,6 +1499,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type - return NULL; - init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); - jump->type = type; -+ hlsl_src_from_node(&jump->condition, condition); - return &jump->node; - } - -@@ -1585,9 +1645,9 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma - return dst; - } - --static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) -+static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) - { -- return hlsl_new_jump(ctx, src->type, &src->node.loc); -+ return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); - } - - static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) -@@ -1728,7 +1788,7 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - return clone_index(ctx, map, hlsl_ir_index(instr)); - - case HLSL_IR_JUMP: -- return clone_jump(ctx, hlsl_ir_jump(instr)); -+ return clone_jump(ctx, map, hlsl_ir_jump(instr)); - - case HLSL_IR_LOAD: - return clone_load(ctx, map, hlsl_ir_load(instr)); -@@ -2065,6 +2125,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - } - } - -+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -+ unsigned int index) -+{ -+ struct hlsl_type *type = var->data_type, *current_type; -+ struct vkd3d_string_buffer *buffer; -+ unsigned int element_index; -+ -+ if (!(buffer = hlsl_get_string_buffer(ctx))) -+ return NULL; -+ -+ vkd3d_string_buffer_printf(buffer, "%s", var->name); -+ -+ while (!type_is_single_component(type)) -+ { -+ current_type = type; -+ element_index = traverse_path_from_component_index(ctx, &type, &index); -+ if (current_type->class == HLSL_CLASS_STRUCT) -+ vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); -+ else -+ vkd3d_string_buffer_printf(buffer, "[%u]", element_index); -+ } -+ -+ return buffer; -+} -+ - const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) - { - struct vkd3d_string_buffer *string; -@@ -2123,18 +2208,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - { - static const char * const names[] = - { -- "HLSL_IR_CALL", -- "HLSL_IR_CONSTANT", -- "HLSL_IR_EXPR", -- "HLSL_IR_IF", -- "HLSL_IR_INDEX", -- "HLSL_IR_LOAD", -- "HLSL_IR_LOOP", -- "HLSL_IR_JUMP", -- "HLSL_IR_RESOURCE_LOAD", -- "HLSL_IR_RESOURCE_STORE", -- "HLSL_IR_STORE", -- "HLSL_IR_SWIZZLE", -+ [HLSL_IR_CALL ] = "HLSL_IR_CALL", -+ [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", -+ [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", -+ [HLSL_IR_IF ] = "HLSL_IR_IF", -+ [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", -+ [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", -+ [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", -+ [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", -+ [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", -+ [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", -+ [HLSL_IR_STORE ] = "HLSL_IR_STORE", -+ [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", - }; - - if (type >= ARRAY_SIZE(names)) -@@ -2146,10 +2231,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) - { - static const char * const names[] = - { -- "HLSL_IR_JUMP_BREAK", -- "HLSL_IR_JUMP_CONTINUE", -- "HLSL_IR_JUMP_DISCARD", -- "HLSL_IR_JUMP_RETURN", -+ [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", -+ [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", -+ [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", -+ [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", -+ [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", - }; - - assert(type < ARRAY_SIZE(names)); -@@ -2337,7 +2423,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_COS] = "cos", - [HLSL_OP1_COS_REDUCED] = "cos_reduced", - [HLSL_OP1_DSX] = "dsx", -+ [HLSL_OP1_DSX_COARSE] = "dsx_coarse", -+ [HLSL_OP1_DSX_FINE] = "dsx_fine", - [HLSL_OP1_DSY] = "dsy", -+ [HLSL_OP1_DSY_COARSE] = "dsy_coarse", -+ [HLSL_OP1_DSY_FINE] = "dsy_fine", - [HLSL_OP1_EXP2] = "exp2", - [HLSL_OP1_FRACT] = "fract", - [HLSL_OP1_LOG2] = "log2", -@@ -2418,8 +2508,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i - vkd3d_string_buffer_printf(buffer, "continue"); - break; - -- case HLSL_IR_JUMP_DISCARD: -- vkd3d_string_buffer_printf(buffer, "discard"); -+ case HLSL_IR_JUMP_DISCARD_NEG: -+ vkd3d_string_buffer_printf(buffer, "discard_neg"); -+ break; -+ -+ case HLSL_IR_JUMP_DISCARD_NZ: -+ vkd3d_string_buffer_printf(buffer, "discard_nz"); - break; - - case HLSL_IR_JUMP_RETURN: -@@ -2703,6 +2797,7 @@ static void free_ir_if(struct hlsl_ir_if *if_node) - - static void free_ir_jump(struct hlsl_ir_jump *jump) - { -+ hlsl_src_remove(&jump->condition); - vkd3d_free(jump); - } - -@@ -3127,8 +3222,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) - { -- unsigned int n_variants = 0; - const char *const *variants; -+ unsigned int n_variants; - - switch (bt) - { -@@ -3148,6 +3243,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - break; - - default: -+ n_variants = 0; -+ variants = NULL; - break; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index bce48e94b24..1a4b995abbf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -422,6 +422,7 @@ struct hlsl_ir_var - uint32_t is_output_semantic : 1; - uint32_t is_uniform : 1; - uint32_t is_param : 1; -+ uint32_t is_separated_resource : 1; - }; - - /* Sized array of variables representing a function's parameters. */ -@@ -502,7 +503,11 @@ enum hlsl_ir_expr_op - HLSL_OP1_COS, - HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ - HLSL_OP1_DSX, -+ HLSL_OP1_DSX_COARSE, -+ HLSL_OP1_DSX_FINE, - HLSL_OP1_DSY, -+ HLSL_OP1_DSY_COARSE, -+ HLSL_OP1_DSY_FINE, - HLSL_OP1_EXP2, - HLSL_OP1_FLOOR, - HLSL_OP1_FRACT, -@@ -558,7 +563,8 @@ enum hlsl_ir_jump_type - { - HLSL_IR_JUMP_BREAK, - HLSL_IR_JUMP_CONTINUE, -- HLSL_IR_JUMP_DISCARD, -+ HLSL_IR_JUMP_DISCARD_NEG, -+ HLSL_IR_JUMP_DISCARD_NZ, - HLSL_IR_JUMP_RETURN, - }; - -@@ -566,6 +572,8 @@ struct hlsl_ir_jump - { - struct hlsl_ir_node node; - enum hlsl_ir_jump_type type; -+ /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ -+ struct hlsl_src condition; - }; - - struct hlsl_ir_swizzle -@@ -600,9 +608,11 @@ struct hlsl_deref - * components, within the pertaining regset), from the start of the variable, of the part - * referenced. - * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- -- * before writing the bytecode. */ -+ * before writing the bytecode. -+ * Since the type information cannot longer be retrieved from the offset alone, the type is -+ * stored in the data_type field. */ - struct hlsl_src offset; -- enum hlsl_regset offset_regset; -+ struct hlsl_type *data_type; - }; - - struct hlsl_ir_load -@@ -803,7 +813,11 @@ struct hlsl_ctx - * Only used for SM1 profiles. */ - struct hlsl_constant_defs - { -- struct hlsl_vec4 *values; -+ struct hlsl_constant_register -+ { -+ uint32_t index; -+ struct hlsl_vec4 value; -+ } *regs; - size_t count, size; - } constant_defs; - /* Number of temp. registers required for the shader to run, i.e. the largest temp register -@@ -1055,6 +1069,8 @@ const char *debug_hlsl_writemask(unsigned int writemask); - const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count); - - struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); -+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -+ unsigned int index); - struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); - const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); - -@@ -1120,7 +1136,7 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, -- enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); -+ enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); - - void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); - -@@ -1132,6 +1148,8 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, -+ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); - - struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); - struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, -@@ -1156,6 +1174,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned in - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, - struct hlsl_type *type, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -+ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope); - struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, - unsigned int sample_count); - struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); -@@ -1187,6 +1207,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type); - unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); - struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, - unsigned int index); -+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, -+ enum hlsl_regset regset, unsigned int index); - bool hlsl_type_is_row_major(const struct hlsl_type *type); - unsigned int hlsl_type_minor_size(const struct hlsl_type *type); - unsigned int hlsl_type_major_size(const struct hlsl_type *type); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 0e07fe578e1..6bf87f8f916 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -53,7 +53,7 @@ struct parse_initializer - { - struct hlsl_ir_node **args; - unsigned int args_count; -- struct list *instrs; -+ struct hlsl_block *instrs; - bool braces; - }; - -@@ -73,6 +73,10 @@ struct parse_variable_def - struct hlsl_semantic semantic; - struct hlsl_reg_reservation reg_reservation; - struct parse_initializer initializer; -+ -+ struct hlsl_type *basic_type; -+ unsigned int modifiers; -+ struct vkd3d_shader_location modifiers_loc; - }; - - struct parse_function -@@ -85,8 +89,8 @@ struct parse_function - - struct parse_if_body - { -- struct list *then_block; -- struct list *else_block; -+ struct hlsl_block *then_block; -+ struct hlsl_block *else_block; - }; - - enum parse_assign_op -@@ -129,9 +133,34 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "%s", s); - } - --static struct hlsl_ir_node *node_from_list(struct list *list) -+static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) - { -- return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); -+ return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); -+} -+ -+static struct list *block_to_list(struct hlsl_block *block) -+{ -+ /* This is a temporary hack to ease the transition from lists to blocks. -+ * It takes advantage of the fact that an allocated hlsl_block pointer is -+ * byte-compatible with an allocated list pointer. */ -+ return &block->instrs; -+} -+ -+static struct hlsl_block *list_to_block(struct list *list) -+{ -+ /* This is a temporary hack to ease the transition from lists to blocks. -+ * It takes advantage of the fact that an allocated hlsl_block pointer is -+ * byte-compatible with an allocated list pointer. */ -+ return CONTAINING_RECORD(list, struct hlsl_block, instrs); -+} -+ -+static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) -+{ -+ struct hlsl_block *block; -+ -+ if ((block = hlsl_alloc(ctx, sizeof(*block)))) -+ hlsl_block_init(block); -+ return block; - } - - static struct list *make_empty_list(struct hlsl_ctx *ctx) -@@ -149,6 +178,12 @@ static void destroy_instr_list(struct list *list) - vkd3d_free(list); - } - -+static void destroy_block(struct hlsl_block *block) -+{ -+ hlsl_block_cleanup(block); -+ vkd3d_free(block); -+} -+ - static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct hlsl_type *src, - struct hlsl_type *dst) - { -@@ -273,9 +308,6 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - return hlsl_types_are_componentwise_equal(ctx, src, dst); - } - --static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -- unsigned int comp, const struct vkd3d_shader_location *loc); -- - static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) - { -@@ -333,7 +365,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - - dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - -- if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) -+ if (!(component_load = hlsl_add_load_component(ctx, instrs, node, src_idx, loc))) - return NULL; - - if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) -@@ -405,29 +437,29 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, - return modifiers | mod; - } - --static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) -+static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) - { - struct hlsl_ir_node *condition, *not, *iff, *jump; - struct hlsl_block then_block; - - /* E.g. "for (i = 0; ; ++i)". */ -- if (list_empty(cond_list)) -+ if (list_empty(&cond_block->instrs)) - return true; - -- condition = node_from_list(cond_list); -+ condition = node_from_block(cond_block); - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) - return false; -- list_add_tail(cond_list, ¬->entry); -+ hlsl_block_add_instr(cond_block, not); - - hlsl_block_init(&then_block); - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) - return false; - hlsl_block_add_instr(&then_block, jump); - - if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) - return false; -- list_add_tail(cond_list, &iff->entry); -+ hlsl_block_add_instr(cond_block, iff); - return true; - } - -@@ -454,10 +486,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att - return false; - } - --static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, -- struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) -+static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, -+ const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, -+ struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) - { -- struct hlsl_block body_block; - struct hlsl_ir_node *loop; - unsigned int i; - -@@ -476,7 +508,7 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const - } - else - { -- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); -+ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); - } - } - else if (!strcmp(attr->name, "loop") -@@ -491,38 +523,34 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const - } - } - -- if (!init && !(init = make_empty_list(ctx))) -+ if (!init && !(init = make_empty_block(ctx))) - goto oom; - - if (!append_conditional_break(ctx, cond)) - goto oom; - -- hlsl_block_init(&body_block); -- -- if (type != LOOP_DO_WHILE) -- list_move_tail(&body_block.instrs, cond); -- -- list_move_tail(&body_block.instrs, body); -- - if (iter) -- list_move_tail(&body_block.instrs, iter); -+ hlsl_block_add_block(body, iter); - - if (type == LOOP_DO_WHILE) -- list_move_tail(&body_block.instrs, cond); -+ list_move_tail(&body->instrs, &cond->instrs); -+ else -+ list_move_head(&body->instrs, &cond->instrs); - -- if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) -+ if (!(loop = hlsl_new_loop(ctx, body, loc))) - goto oom; -- list_add_tail(init, &loop->entry); -+ hlsl_block_add_instr(init, loop); - -- vkd3d_free(cond); -- vkd3d_free(body); -+ destroy_block(cond); -+ destroy_block(body); -+ destroy_block(iter); - return init; - - oom: -- destroy_instr_list(init); -- destroy_instr_list(cond); -- destroy_instr_list(iter); -- destroy_instr_list(body); -+ destroy_block(init); -+ destroy_block(cond); -+ destroy_block(iter); -+ destroy_block(body); - return NULL; - } - -@@ -539,7 +567,7 @@ static unsigned int initializer_size(const struct parse_initializer *initializer - - static void free_parse_initializer(struct parse_initializer *initializer) - { -- destroy_instr_list(initializer->instrs); -+ destroy_block(initializer->instrs); - vkd3d_free(initializer->args); - } - -@@ -625,7 +653,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod - return NULL; - } - --static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, -+static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *return_type = ctx->cur_function->return_type; -@@ -637,7 +665,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, - { - struct hlsl_ir_node *store; - -- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) -+ if (!(return_value = add_implicit_conversion(ctx, block_to_list(block), return_value, return_type, loc))) - return false; - - if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) -@@ -656,14 +684,14 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); - } - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) - return false; -- list_add_tail(instrs, &jump->entry); -+ hlsl_block_add_instr(block, jump); - - return true; - } - --static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *load, *store; -@@ -686,7 +714,7 @@ static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list - return load; - } - --static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, -+static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *record, - unsigned int idx, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *index, *c; -@@ -695,16 +723,16 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct - - if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) - return false; -- list_add_tail(instrs, &c->entry); -+ hlsl_block_add_instr(block, c); - - if (!(index = hlsl_new_index(ctx, record, c, loc))) - return false; -- list_add_tail(instrs, &index->entry); -+ hlsl_block_add_instr(block, index); - - return true; - } - --static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc); - -@@ -830,6 +858,16 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) - return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; - } - -+static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); -+} -+ -+static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return !shader_profile_version_ge(ctx, major, minor); -+} -+ - static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - struct hlsl_type *type, unsigned int modifiers, struct list *defs) - { -@@ -1020,7 +1058,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const - struct hlsl_reg_reservation reservation = {0}; - char *endptr; - -- if (ctx->profile->major_version < 4) -+ if (shader_profile_version_lt(ctx, 4, 0)) - return reservation; - - reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); -@@ -1097,20 +1135,50 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - { - struct hlsl_ir_constant *constant; - struct hlsl_ir_node *node; -+ struct hlsl_block expr; - unsigned int ret = 0; - bool progress; - -- if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ switch (node->type) -+ { -+ case HLSL_IR_CONSTANT: -+ case HLSL_IR_EXPR: -+ case HLSL_IR_SWIZZLE: -+ case HLSL_IR_LOAD: -+ case HLSL_IR_INDEX: -+ continue; -+ case HLSL_IR_CALL: -+ case HLSL_IR_IF: -+ case HLSL_IR_LOOP: -+ case HLSL_IR_JUMP: -+ case HLSL_IR_RESOURCE_LOAD: -+ case HLSL_IR_RESOURCE_STORE: -+ case HLSL_IR_STORE: -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Expected literal expression."); -+ } -+ } -+ -+ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -+ return 0; -+ hlsl_block_add_block(&expr, block); -+ -+ if (!add_implicit_conversion(ctx, &expr.instrs, node_from_block(&expr), - hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) -+ { -+ hlsl_block_cleanup(&expr); - return 0; -+ } - - do - { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, block); -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, &expr); - } while (progress); - -- node = node_from_list(&block->instrs); -+ node = node_from_block(&expr); - if (node->type == HLSL_IR_CONSTANT) - { - constant = hlsl_ir_constant(node); -@@ -1119,9 +1187,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - else - { - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Failed to evaluate constant expression %d.", node->type); -+ "Failed to evaluate constant expression."); - } - -+ hlsl_block_cleanup(&expr); -+ - return ret; - } - -@@ -1284,7 +1354,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, - { - if (operands[j]) - { -- if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, instrs, operands[j], i, loc))) - return NULL; - - cell_operands[j] = load; -@@ -1334,23 +1404,23 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * - } - } - --static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; - -- return add_expr(ctx, instrs, op, args, arg->data_type, loc); -+ return add_expr(ctx, block_to_list(block), op, args, arg->data_type, loc); - } - --static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - check_integer_type(ctx, arg); - -- return add_unary_arithmetic_expr(ctx, instrs, op, arg, loc); -+ return add_unary_arithmetic_expr(ctx, block, op, arg, loc); - } - --static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; -@@ -1359,10 +1429,10 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct - bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, - arg->data_type->dimx, arg->data_type->dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg, bool_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, bool_type, loc); -+ return add_expr(ctx, block_to_list(block), op, args, bool_type, loc); - } - - static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, -@@ -1378,7 +1448,7 @@ static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const str - return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - } - --static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1387,49 +1457,26 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str - - common_type = get_common_numeric_type(ctx, arg1, arg2, loc); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block_to_list(block), arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, common_type, loc); -+ return add_expr(ctx, block_to_list(block), op, args, common_type, loc); - } - --static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); -- return list1; --} -- --static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { - check_integer_type(ctx, arg1); - check_integer_type(ctx, arg2); - -- return add_binary_arithmetic_expr(ctx, instrs, op, arg1, arg2, loc); --} -- --static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); -- -- return list1; -+ return add_binary_arithmetic_expr(ctx, block, op, arg1, arg2, loc); - } - --static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1445,27 +1492,16 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str - common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block_to_list(block), arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, return_type, loc); --} -- --static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); -- return list1; -+ return add_expr(ctx, block_to_list(block), op, args, return_type, loc); - } - --static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1479,28 +1515,16 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct - - common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block_to_list(block), arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, common_type, loc); -+ return add_expr(ctx, block_to_list(block), op, args, common_type, loc); - } - --static struct list *add_binary_logical_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); -- -- return list1; --} -- --static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1522,28 +1546,16 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l - return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg1, return_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block_to_list(block), arg2, integer_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, return_type, loc); -+ return add_expr(ctx, block_to_list(block), op, args, return_type, loc); - } - --static struct list *add_binary_shift_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); -- -- return list1; --} -- --static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) - { - enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); -@@ -1557,8 +1569,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg1->data_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s.\n", string->buffer); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return NULL; - } -@@ -1568,8 +1579,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg2->data_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s.\n", string->buffer); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return NULL; - } -@@ -1589,13 +1599,60 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - common_type = hlsl_get_vector_type(ctx, base, dim); - ret_type = hlsl_get_scalar_type(ctx, base); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block_to_list(instrs), arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block_to_list(instrs), arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, ret_type, loc); -+ return add_expr(ctx, block_to_list(instrs), op, args, ret_type, loc); -+} -+ -+static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, -+ struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); -+ -+ hlsl_block_add_block(block1, block2); -+ destroy_block(block2); -+ -+ switch (op) -+ { -+ case HLSL_OP2_ADD: -+ case HLSL_OP2_DIV: -+ case HLSL_OP2_MOD: -+ case HLSL_OP2_MUL: -+ add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_BIT_AND: -+ case HLSL_OP2_BIT_OR: -+ case HLSL_OP2_BIT_XOR: -+ add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_LESS: -+ case HLSL_OP2_GEQUAL: -+ case HLSL_OP2_EQUAL: -+ case HLSL_OP2_NEQUAL: -+ add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_LOGIC_AND: -+ case HLSL_OP2_LOGIC_OR: -+ add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_LSHIFT: -+ case HLSL_OP2_RSHIFT: -+ add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ return block1; - } - - static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) -@@ -1663,7 +1720,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - - if (assign_op == ASSIGN_OP_SUB) - { -- if (!(rhs = add_unary_arithmetic_expr(ctx, instrs, HLSL_OP1_NEG, rhs, &rhs->loc))) -+ if (!(rhs = add_unary_arithmetic_expr(ctx, list_to_block(instrs), HLSL_OP1_NEG, rhs, &rhs->loc))) - return NULL; - assign_op = ASSIGN_OP_ADD; - } -@@ -1672,7 +1729,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - enum hlsl_ir_expr_op op = op_from_assignment(assign_op); - - assert(op); -- if (!(rhs = add_binary_arithmetic_expr(ctx, instrs, op, lhs, rhs, &rhs->loc))) -+ if (!(rhs = add_binary_arithmetic_expr(ctx, list_to_block(instrs), op, lhs, rhs, &rhs->loc))) - return NULL; - } - -@@ -1779,7 +1836,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - return NULL; - list_add_tail(instrs, &cell->entry); - -- if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) -+ if (!(load = hlsl_add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) - return NULL; - - if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) -@@ -1820,10 +1877,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - return copy; - } - --static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, -+static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, - const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *lhs = node_from_list(instrs); -+ struct hlsl_ir_node *lhs = node_from_block(block); - struct hlsl_ir_node *one; - - if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) -@@ -1832,9 +1889,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem - - if (!(one = hlsl_new_int_constant(ctx, 1, loc))) - return false; -- list_add_tail(instrs, &one->entry); -+ hlsl_block_add_instr(block, one); - -- if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) -+ if (!add_assignment(ctx, block_to_list(block), lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) - return false; - - if (post) -@@ -1843,7 +1900,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem - - if (!(copy = hlsl_new_copy(ctx, lhs))) - return false; -- list_add_tail(instrs, ©->entry); -+ hlsl_block_add_instr(block, copy); - - /* Post increment/decrement expressions are considered const. */ - if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) -@@ -1853,7 +1910,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem - return true; - } - --static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, -+static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) - { - unsigned int src_comp_count = hlsl_type_component_count(src->data_type); -@@ -1868,17 +1925,17 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_type *dst_comp_type; - struct hlsl_block block; - -- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) -+ if (!(load = hlsl_add_load_component(ctx, block_to_list(instrs), src, k, &src->loc))) - return; - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - -- if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -+ if (!(conv = add_implicit_conversion(ctx, block_to_list(instrs), load, dst_comp_type, &src->loc))) - return; - - if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) - return; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(instrs, &block); - - ++*store_index; - } -@@ -1924,211 +1981,234 @@ static bool type_has_numeric_components(struct hlsl_type *type) - return false; - } - --static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, -- unsigned int modifiers, const struct vkd3d_shader_location *modifiers_loc, struct list *var_list) -+static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, -+ const struct vkd3d_shader_location *loc) - { -- struct parse_variable_def *v, *v_next; -+ modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); -+ if (modifiers) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_modifiers_to_string(ctx, modifiers))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+} -+ -+static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) -+{ -+ struct hlsl_type *basic_type = v->basic_type; - struct hlsl_ir_function_decl *func; -- unsigned int invalid_modifiers; -- struct list *statements_list; -+ struct hlsl_semantic new_semantic; -+ uint32_t modifiers = v->modifiers; -+ bool unbounded_res_array = false; - struct hlsl_ir_var *var; - struct hlsl_type *type; - bool local = true; -+ char *var_name; -+ unsigned int i; -+ -+ assert(basic_type); - - if (basic_type->class == HLSL_CLASS_MATRIX) - assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - -- if (!(statements_list = make_empty_list(ctx))) -- { -- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) -- free_parse_variable_def(v); -- vkd3d_free(var_list); -- return NULL; -- } -- -- if (!var_list) -- return statements_list; -+ type = basic_type; - -- invalid_modifiers = modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); -- if (invalid_modifiers) -+ if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) - { -- struct vkd3d_string_buffer *string; -- -- if ((string = hlsl_modifiers_to_string(ctx, invalid_modifiers))) -- hlsl_error(ctx, modifiers_loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -+ for (i = 0; i < v->arrays.count; ++i) -+ unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); - } - -- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) -+ if (unbounded_res_array) - { -- bool unbounded_res_array = false; -- unsigned int i; -- -- type = basic_type; -- -- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) -+ if (v->arrays.count == 1) - { -- for (i = 0; i < v->arrays.count; ++i) -- unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -+ hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); -+ return; - } -- -- if (unbounded_res_array) -+ else - { -- if (v->arrays.count == 1) -- { -- hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); -- free_parse_variable_def(v); -- continue; -- } -- else -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Unbounded resource arrays cannot be multi-dimensional."); -- } -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Unbounded resource arrays cannot be multi-dimensional."); - } -- else -+ } -+ else -+ { -+ for (i = 0; i < v->arrays.count; ++i) - { -- for (i = 0; i < v->arrays.count; ++i) -+ if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - { -- if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) -- { -- unsigned int size = initializer_size(&v->initializer); -- unsigned int elem_components = hlsl_type_component_count(type); -- -- if (i < v->arrays.count - 1) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Only innermost array size can be implicit."); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -- } -- else if (elem_components == 0) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Cannot declare an implicit size array of a size 0 type."); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -- } -- else if (size == 0) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Implicit size arrays need to be initialized."); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -+ unsigned int size = initializer_size(&v->initializer); -+ unsigned int elem_components = hlsl_type_component_count(type); - -- } -- else if (size % elem_components != 0) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Cannot initialize implicit size array with %u components, expected a multiple of %u.", -- size, elem_components); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -- } -- else -- { -- v->arrays.sizes[i] = size / elem_components; -- } -+ if (i < v->arrays.count - 1) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Only innermost array size can be implicit."); -+ v->initializer.args_count = 0; -+ } -+ else if (elem_components == 0) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Cannot declare an implicit size array of a size 0 type."); -+ v->initializer.args_count = 0; -+ } -+ else if (size == 0) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Implicit size arrays need to be initialized."); -+ v->initializer.args_count = 0; -+ } -+ else if (size % elem_components != 0) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Cannot initialize implicit size array with %u components, expected a multiple of %u.", -+ size, elem_components); -+ v->initializer.args_count = 0; -+ } -+ else -+ { -+ v->arrays.sizes[i] = size / elem_components; - } -- type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); - } -+ type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); - } -- vkd3d_free(v->arrays.sizes); -- -- if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) -- { -- free_parse_variable_def(v); -- continue; -- } -+ } - -- var->buffer = ctx->cur_buffer; -+ if (!(var_name = vkd3d_strdup(v->name))) -+ return; - -- if (var->buffer == ctx->globals_buffer) -+ new_semantic = v->semantic; -+ if (v->semantic.name) -+ { -+ if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) - { -- if (var->reg_reservation.offset_type) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -- "packoffset() is only allowed inside constant buffer declarations."); -+ vkd3d_free(var_name); -+ return; - } -+ } - -- if (ctx->cur_scope == ctx->globals) -- { -- local = false; -+ if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) -+ { -+ hlsl_cleanup_semantic(&new_semantic); -+ vkd3d_free(var_name); -+ return; -+ } - -- if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); -+ var->buffer = ctx->cur_buffer; - -- /* Mark it as uniform. We need to do this here since synthetic -- * variables also get put in the global scope, but shouldn't be -- * considered uniforms, and we have no way of telling otherwise. */ -- if (!(modifiers & HLSL_STORAGE_STATIC)) -- var->storage_modifiers |= HLSL_STORAGE_UNIFORM; -+ if (var->buffer == ctx->globals_buffer) -+ { -+ if (var->reg_reservation.offset_type) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() is only allowed inside constant buffer declarations."); -+ } - -- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && -- type_has_object_components(var->data_type, true)) -- { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Target profile doesn't support objects as struct members in uniform variables.\n"); -- } -+ if (ctx->cur_scope == ctx->globals) -+ { -+ local = false; - -- if ((func = hlsl_get_func_decl(ctx, var->name))) -- { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -- "'%s' is already defined as a function.", var->name); -- hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, -- "'%s' was previously defined here.", var->name); -- } -- } -- else -- { -- static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED -- | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; -+ if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); - -- if (modifiers & invalid) -- { -- struct vkd3d_string_buffer *string; -+ /* Mark it as uniform. We need to do this here since synthetic -+ * variables also get put in the global scope, but shouldn't be -+ * considered uniforms, and we have no way of telling otherwise. */ -+ if (!(modifiers & HLSL_STORAGE_STATIC)) -+ var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - -- if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Modifiers '%s' are not allowed on local variables.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -- } -- if (var->semantic.name) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -- "Semantics are not allowed on local variables."); -+ if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && -+ type_has_object_components(var->data_type, true)) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Target profile doesn't support objects as struct members in uniform variables."); - } - -- if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -- && type_has_object_components(var->data_type, false)) -+ if ((func = hlsl_get_func_decl(ctx, var->name))) - { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Static variables cannot have both numeric and resource components."); -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -+ "'%s' is already defined as a function.", var->name); -+ hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, -+ "'%s' was previously defined here.", var->name); - } -+ } -+ else -+ { -+ static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED -+ | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; - -- if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count -- && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) -+ if (modifiers & invalid) - { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, -- "Const variable \"%s\" is missing an initializer.", var->name); -- hlsl_free_var(var); -- free_parse_initializer(&v->initializer); -- vkd3d_free(v); -- continue; -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Modifiers '%s' are not allowed on local variables.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); - } -+ if (var->semantic.name) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -+ "Semantics are not allowed on local variables."); -+ } - -- if (!hlsl_add_var(ctx, var, local)) -+ if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -+ && type_has_object_components(var->data_type, false)) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Static variables cannot have both numeric and resource components."); -+ } -+ -+ if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count -+ && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, -+ "Const variable \"%s\" is missing an initializer.", var->name); -+ hlsl_free_var(var); -+ return; -+ } -+ -+ if (!hlsl_add_var(ctx, var, local)) -+ { -+ struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -+ -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -+ "Variable \"%s\" was already declared in this scope.", var->name); -+ hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); -+ hlsl_free_var(var); -+ return; -+ } -+} -+ -+static struct list *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) -+{ -+ struct parse_variable_def *v, *v_next; -+ struct list *statements_list; -+ struct hlsl_ir_var *var; -+ struct hlsl_type *type; -+ -+ if (!(statements_list = make_empty_list(ctx))) -+ { -+ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) - { -- struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -+ free_parse_variable_def(v); -+ } -+ vkd3d_free(var_list); -+ return NULL; -+ } - -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -- "Variable \"%s\" was already declared in this scope.", var->name); -- hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); -- hlsl_free_var(var); -- free_parse_initializer(&v->initializer); -- vkd3d_free(v); -+ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) -+ { -+ /* If this fails, the variable failed to be declared. */ -+ if (!(var = hlsl_get_var(ctx->cur_scope, v->name))) -+ { -+ free_parse_variable_def(v); - continue; - } -+ type = var->data_type; - - if (v->initializer.args_count) - { -@@ -2143,8 +2223,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Expected %u components in initializer, but got %u.", - hlsl_type_component_count(type), size); -- free_parse_initializer(&v->initializer); -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - -@@ -2159,16 +2238,14 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); - - assert(v->initializer.args_count == 1); -- list_add_tail(v->initializer.instrs, &load->node.entry); -- add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); -+ hlsl_block_add_instr(v->initializer.instrs, &load->node); -+ add_assignment(ctx, block_to_list(v->initializer.instrs), &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); - } - -- if (modifiers & HLSL_STORAGE_STATIC) -- list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); -+ if (var->storage_modifiers & HLSL_STORAGE_STATIC) -+ hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); - else -- list_move_tail(statements_list, v->initializer.instrs); -- vkd3d_free(v->initializer.args); -- vkd3d_free(v->initializer.instrs); -+ list_move_tail(statements_list, &v->initializer.instrs->instrs); - } - else if (var->storage_modifiers & HLSL_STORAGE_STATIC) - { -@@ -2178,32 +2255,33 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - - if (type_has_object_components(var->data_type, false)) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - - if (!(zero = hlsl_new_uint_constant(ctx, 0, &var->loc))) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - hlsl_block_add_instr(&ctx->static_initializers, zero); - - if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - - if (!(store = hlsl_new_simple_store(ctx, var, cast))) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - hlsl_block_add_instr(&ctx->static_initializers, store); - } -- vkd3d_free(v); -+ free_parse_variable_def(v); - } -+ - vkd3d_free(var_list); - return statements_list; - } -@@ -2286,7 +2364,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, - return arg; - - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -- return add_implicit_conversion(ctx, params->instrs, arg, type, loc); -+ return add_implicit_conversion(ctx, block_to_list(params->instrs), arg, type, loc); - } - - static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -2298,7 +2376,7 @@ static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *p - { - struct hlsl_ir_node *new_arg; - -- if (!(new_arg = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc))) -+ if (!(new_arg = add_implicit_conversion(ctx, block_to_list(params->instrs), params->args[i], type, loc))) - return false; - params->args[i] = new_arg; - } -@@ -2394,18 +2472,18 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, - - if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) - return false; -- list_add_tail(params->instrs, &one->entry); -+ hlsl_block_add_instr(params->instrs, one); - - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; -- list_add_tail(params->instrs, &zero->entry); -+ hlsl_block_add_instr(params->instrs, zero); - - mul = one; - - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { -- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) - return false; - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) -@@ -2431,7 +2509,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, - { - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; -- list_add_tail(params->instrs, &zero->entry); -+ hlsl_block_add_instr(params->instrs, zero); - - if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) - return false; -@@ -2442,14 +2520,14 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, - { - if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) - return false; -- list_add_tail(params->instrs, &bfalse->entry); -+ hlsl_block_add_instr(params->instrs, bfalse); - - or = bfalse; - - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { -- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) - return false; - - if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) -@@ -2491,7 +2569,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); - - operands[0] = params->args[0]; -- return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); -+ return add_expr(ctx, block_to_list(params->instrs), HLSL_OP1_REINTERPRET, operands, data_type, loc); - } - - static bool intrinsic_asuint(struct hlsl_ctx *ctx, -@@ -2527,7 +2605,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); - - operands[0] = params->args[0]; -- return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); -+ return add_expr(ctx, block_to_list(params->instrs), HLSL_OP1_REINTERPRET, operands, data_type, loc); - } - - static bool intrinsic_clamp(struct hlsl_ctx *ctx, -@@ -2544,6 +2622,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); - } - -+static bool intrinsic_clip(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *condition, *jump; -+ -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ -+ condition = params->args[0]; -+ -+ if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, condition->data_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Argument type cannot exceed 4 components, got type \"%s\".", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } -+ -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, jump); -+ -+ return true; -+} -+ - static bool intrinsic_cos(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2571,34 +2677,34 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, - - cast_type = hlsl_get_vector_type(ctx, base, 3); - -- if (!(arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc))) -+ if (!(arg1_cast = add_implicit_conversion(ctx, block_to_list(params->instrs), arg1, cast_type, loc))) - return false; - -- if (!(arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc))) -+ if (!(arg2_cast = add_implicit_conversion(ctx, block_to_list(params->instrs), arg2, cast_type, loc))) - return false; - - if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg1_swzl1->entry); -+ hlsl_block_add_instr(params->instrs, arg1_swzl1); - - if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg2_swzl1->entry); -+ hlsl_block_add_instr(params->instrs, arg2_swzl1); - - if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) - return false; - - if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) - return false; -- list_add_tail(params->instrs, &mul1_neg->entry); -+ hlsl_block_add_instr(params->instrs, mul1_neg); - - if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg1_swzl2->entry); -+ hlsl_block_add_instr(params->instrs, arg1_swzl2); - - if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg2_swzl2->entry); -+ hlsl_block_add_instr(params->instrs, arg2_swzl2); - - if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) - return false; -@@ -2617,6 +2723,28 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); - } - -+static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); -+} -+ -+static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); -+} -+ - static bool intrinsic_ddy(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2628,6 +2756,28 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); - } - -+static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); -+} -+ -+static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); -+} -+ - static bool intrinsic_distance(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2668,7 +2818,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, - /* 1/ln(2) */ - if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) - return false; -- list_add_tail(params->instrs, &coeff->entry); -+ hlsl_block_add_instr(params->instrs, coeff); - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) - return false; -@@ -2715,7 +2865,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer - - if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) - return false; -- list_add_tail(params->instrs, &zero->entry); -+ hlsl_block_add_instr(params->instrs, zero); - - if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) - return false; -@@ -2729,7 +2879,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer - if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) - return false; - -- if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) -+ if (!(select = hlsl_add_conditional(ctx, block_to_list(params->instrs), ge, frac, neg_frac))) - return false; - - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); -@@ -2806,7 +2956,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, - } - - static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, -- struct list *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, -+ struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *log, *mul; -@@ -2861,15 +3011,15 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, - init_value.u[3].f = 1.0f; - if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) - return false; -- list_add_tail(params->instrs, &init->entry); -+ hlsl_block_add_instr(params->instrs, init); - - if (!(store = hlsl_new_simple_store(ctx, var, init))) - return false; -- list_add_tail(params->instrs, &store->entry); -+ hlsl_block_add_instr(params->instrs, store); - - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; -- list_add_tail(params->instrs, &zero->entry); -+ hlsl_block_add_instr(params->instrs, zero); - - /* Diffuse component. */ - if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) -@@ -2877,7 +3027,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, - - if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) - return false; -- list_move_tail(params->instrs, &block.instrs); -+ hlsl_block_add_block(params->instrs, &block); - - /* Specular component. */ - if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) -@@ -2892,16 +3042,16 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, - if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) - return false; - -- if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) -+ if (!(load = hlsl_add_conditional(ctx, block_to_list(params->instrs), specular_or, zero, specular_pow))) - return false; - - if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) - return false; -- list_move_tail(params->instrs, &block.instrs); -+ hlsl_block_add_block(params->instrs, &block); - - if (!(var_load = hlsl_new_var_load(ctx, var, loc))) - return false; -- list_add_tail(params->instrs, &var_load->node.entry); -+ hlsl_block_add_instr(params->instrs, &var_load->node); - - return true; - } -@@ -3013,10 +3163,10 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - ret_type = hlsl_get_scalar_type(ctx, base); - } - -- if (!(cast1 = add_implicit_conversion(ctx, params->instrs, arg1, cast_type1, loc))) -+ if (!(cast1 = add_implicit_conversion(ctx, block_to_list(params->instrs), arg1, cast_type1, loc))) - return false; - -- if (!(cast2 = add_implicit_conversion(ctx, params->instrs, arg2, cast_type2, loc))) -+ if (!(cast2 = add_implicit_conversion(ctx, block_to_list(params->instrs), arg2, cast_type2, loc))) - return false; - - if (!(var = hlsl_new_synthetic_var(ctx, "mul", matrix_type, loc))) -@@ -3034,10 +3184,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *value1, *value2, *mul; - -- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) -+ if (!(value1 = hlsl_add_load_component(ctx, block_to_list(params->instrs), -+ cast1, j * cast1->data_type->dimx + k, loc))) - return false; - -- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) -+ if (!(value2 = hlsl_add_load_component(ctx, block_to_list(params->instrs), -+ cast2, k * cast2->data_type->dimx + i, loc))) - return false; - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) -@@ -3056,15 +3208,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - - if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) - return false; -- list_move_tail(params->instrs, &block.instrs); -+ hlsl_block_add_block(params->instrs, &block); - } - } - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return false; -- list_add_tail(params->instrs, &load->node.entry); -+ hlsl_block_add_instr(params->instrs, &load->node); - -- return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); -+ return !!add_implicit_conversion(ctx, block_to_list(params->instrs), &load->node, ret_type, loc); - } - - static bool intrinsic_normalize(struct hlsl_ctx *ctx, -@@ -3169,14 +3321,14 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, - - if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) - return false; -- list_add_tail(params->instrs, &zero->entry); -+ hlsl_block_add_instr(params->instrs, zero); - - /* Check if 0 < arg, cast bool to int */ - - if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) - return false; - -- if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) -+ if (!(op1 = add_implicit_conversion(ctx, block_to_list(params->instrs), lt, int_type, loc))) - return false; - - /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ -@@ -3184,7 +3336,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, - if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) - return false; - -- if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) -+ if (!(op2 = add_implicit_conversion(ctx, block_to_list(params->instrs), lt, int_type, loc))) - return false; - - if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) -@@ -3229,7 +3381,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, - - if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) - return false; -- list_add_tail(params->instrs, &one->entry); -+ hlsl_block_add_instr(params->instrs, one); - - if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) - return false; -@@ -3242,11 +3394,11 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, - - if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) - return false; -- list_add_tail(params->instrs, &minus_two->entry); -+ hlsl_block_add_instr(params->instrs, minus_two); - - if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) - return false; -- list_add_tail(params->instrs, &three->entry); -+ hlsl_block_add_instr(params->instrs, three); - - if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) - return false; -@@ -3289,7 +3441,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, - - type = ge->data_type; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -- return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); -+ return !!add_implicit_conversion(ctx, block_to_list(params->instrs), ge, type, loc); - } - - static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -3308,7 +3460,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (params->args_count == 4) - { -- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); -+ hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); - } - - sampler_type = params->args[0]->data_type; -@@ -3324,7 +3476,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - hlsl_release_string_buffer(ctx, string); - } - -- if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -+ if (!(coords = add_implicit_conversion(ctx, block_to_list(params->instrs), params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) - coords = params->args[1]; - -@@ -3335,7 +3487,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(params->instrs, &load->entry); -+ hlsl_block_add_instr(params->instrs, load); - return true; - } - -@@ -3369,7 +3521,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - - if ((string = hlsl_type_to_string(ctx, arg_type))) - hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", -+ "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", - string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; -@@ -3377,7 +3529,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - - if (arg_type->class == HLSL_CLASS_SCALAR) - { -- list_add_tail(params->instrs, &arg->entry); -+ hlsl_block_add_instr(params->instrs, arg); - return true; - } - -@@ -3393,18 +3545,18 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - { - struct hlsl_block block; - -- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, j * arg->data_type->dimx + i, loc))) - return false; - - if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) - return false; -- list_move_tail(params->instrs, &block.instrs); -+ hlsl_block_add_block(params->instrs, &block); - } - } - - if (!(var_load = hlsl_new_var_load(ctx, var, loc))) - return false; -- list_add_tail(params->instrs, &var_load->node.entry); -+ hlsl_block_add_instr(params->instrs, &var_load->node); - - return true; - } -@@ -3444,13 +3596,13 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - - if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) - return false; -- list_add_tail(params->instrs, &c->entry); -+ hlsl_block_add_instr(params->instrs, c); - - if (arg_type->class == HLSL_CLASS_VECTOR) - { - if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) - return false; -- list_add_tail(params->instrs, &swizzle->entry); -+ hlsl_block_add_instr(params->instrs, swizzle); - - arg = swizzle; - } -@@ -3458,7 +3610,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) - return false; - -- if (ctx->profile->major_version >= 4) -+ if (shader_profile_version_ge(ctx, 4, 0)) - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); - - return true; -@@ -3482,10 +3634,15 @@ intrinsic_functions[] = - {"asfloat", 1, true, intrinsic_asfloat}, - {"asuint", -1, true, intrinsic_asuint}, - {"clamp", 3, true, intrinsic_clamp}, -+ {"clip", 1, true, intrinsic_clip}, - {"cos", 1, true, intrinsic_cos}, - {"cross", 2, true, intrinsic_cross}, - {"ddx", 1, true, intrinsic_ddx}, -+ {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, -+ {"ddx_fine", 1, true, intrinsic_ddx_fine}, - {"ddy", 1, true, intrinsic_ddy}, -+ {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, -+ {"ddy_fine", 1, true, intrinsic_ddy_fine}, - {"distance", 2, true, intrinsic_distance}, - {"dot", 2, true, intrinsic_dot}, - {"exp", 1, true, intrinsic_exp}, -@@ -3549,7 +3706,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - { - struct hlsl_ir_node *cast; - -- if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) -+ if (!(cast = add_cast(ctx, block_to_list(args->instrs), arg, param->data_type, &arg->loc))) - goto fail; - args->args[i] = cast; - arg = cast; -@@ -3561,13 +3718,13 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - - if (!(store = hlsl_new_simple_store(ctx, param, arg))) - goto fail; -- list_add_tail(args->instrs, &store->entry); -+ hlsl_block_add_instr(args->instrs, store); - } - } - - if (!(call = hlsl_new_call(ctx, decl, loc))) - goto fail; -- list_add_tail(args->instrs, &call->entry); -+ hlsl_block_add_instr(args->instrs, call); - - for (i = 0; i < decl->parameters.count; ++i) - { -@@ -3584,9 +3741,9 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - - if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) - goto fail; -- list_add_tail(args->instrs, &load->node.entry); -+ hlsl_block_add_instr(args->instrs, &load->node); - -- if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) -+ if (!add_assignment(ctx, block_to_list(args->instrs), arg, ASSIGN_OP_ASSIGN, &load->node)) - goto fail; - } - } -@@ -3597,7 +3754,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - - if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) - goto fail; -- list_add_tail(args->instrs, &load->node.entry); -+ hlsl_block_add_instr(args->instrs, &load->node); - } - else - { -@@ -3606,7 +3763,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - - if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) - goto fail; -- list_add_tail(args->instrs, &expr->entry); -+ hlsl_block_add_instr(args->instrs, expr); - } - } - else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), -@@ -3655,14 +3812,14 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - goto fail; - } - vkd3d_free(args->args); -- return args->instrs; -+ return block_to_list(args->instrs); - - fail: - free_parse_initializer(args); - return NULL; - } - --static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, -+static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, - struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_load *load; -@@ -3692,7 +3849,7 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -- list_add_tail(params->instrs, &load->node.entry); -+ hlsl_block_add_instr(params->instrs, &load->node); - - vkd3d_free(params->args); - return params->instrs; -@@ -4272,6 +4429,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - char *name; - DWORD modifiers; - struct hlsl_ir_node *instr; -+ struct hlsl_block *block; - struct list *list; - struct parse_fields fields; - struct parse_function function; -@@ -4399,38 +4557,13 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %token C_INTEGER - %token PRE_LINE - --%type add_expr --%type assignment_expr --%type bitand_expr --%type bitor_expr --%type bitxor_expr --%type compound_statement --%type conditional_expr - %type declaration - %type declaration_statement --%type discard_statement --%type equality_expr --%type expr --%type expr_optional --%type expr_statement --%type initializer_expr --%type jump_statement --%type logicand_expr --%type logicor_expr --%type loop_statement --%type mul_expr --%type postfix_expr - %type primary_expr --%type relational_expr --%type selection_statement --%type shift_expr --%type statement --%type statement_list --%type struct_declaration -+%type struct_declaration_without_vars - %type type_specs --%type unary_expr - %type variables_def --%type variables_def_optional -+%type variables_def_typed - - %token VAR_IDENTIFIER - %token NEW_IDENTIFIER -@@ -4446,6 +4579,31 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type attribute_list - %type attribute_list_optional - -+%type add_expr -+%type assignment_expr -+%type bitand_expr -+%type bitor_expr -+%type bitxor_expr -+%type compound_statement -+%type conditional_expr -+%type equality_expr -+%type expr -+%type expr_optional -+%type expr_statement -+%type initializer_expr -+%type jump_statement -+%type logicand_expr -+%type logicor_expr -+%type loop_statement -+%type mul_expr -+%type postfix_expr -+%type relational_expr -+%type shift_expr -+%type selection_statement -+%type statement -+%type statement_list -+%type unary_expr -+ - %type boolean - - %type buffer_type -@@ -4493,6 +4651,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type type_spec - %type variable_decl - %type variable_def -+%type variable_def_typed - - %% - -@@ -4561,25 +4720,19 @@ preproc_directive: - } - } - --struct_declaration: -- var_modifiers struct_spec variables_def_optional ';' -+struct_declaration_without_vars: -+ var_modifiers struct_spec ';' - { -- struct hlsl_type *type; -- unsigned int modifiers = $1; -+ if (!$2->name) -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Anonymous struct type must declare a variable."); - -- if (!$3) -- { -- if (!$2->name) -- hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Anonymous struct type must declare a variable."); -- if (modifiers) -- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Modifiers are not allowed on struct type declarations."); -- } -+ if ($1) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Modifiers are not allowed on struct type declarations."); - -- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ if (!($$ = make_empty_list(ctx))) - YYABORT; -- $$ = declare_vars(ctx, type, modifiers, &@1, $3); - } - - struct_spec: -@@ -4702,7 +4855,7 @@ attribute: - } - $$->name = $2; - list_init(&$$->instrs); -- list_move_tail(&$$->instrs, $4.instrs); -+ list_move_tail(&$$->instrs, &$4.instrs->instrs); - vkd3d_free($4.instrs); - $$->loc = @$; - $$->args_count = $4.args_count; -@@ -4758,15 +4911,15 @@ func_declaration: - "Function \"%s\" is already defined.", decl->func->name); - hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, - "\"%s\" was previously defined here.", decl->func->name); -- hlsl_free_instr_list($2); -+ destroy_block($2); - } - else - { - size_t i; - - decl->has_body = true; -- list_move_tail(&decl->body.instrs, $2); -- vkd3d_free($2); -+ hlsl_block_add_block(&decl->body, $2); -+ destroy_block($2); - - /* Semantics are taken from whichever definition has a body. - * We can't just replace the hlsl_ir_var pointers, though: if -@@ -4943,7 +5096,7 @@ func_prototype: - compound_statement: - '{' '}' - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - | '{' scope_start statement_list '}' -@@ -5261,7 +5414,12 @@ type_no_void: - { - validate_texture_format_type(ctx, $3, &@3); - -- /* TODO: unspecified sample count is not allowed for all targets */ -+ if (shader_profile_version_lt(ctx, 4, 1)) -+ { -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); -+ } -+ - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); - } - | texture_ms_type '<' type ',' shift_expr '>' -@@ -5270,7 +5428,7 @@ type_no_void: - struct hlsl_block block; - - hlsl_block_init(&block); -- list_move_tail(&block.instrs, $5); -+ hlsl_block_add_block(&block, $5); - - sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5); - -@@ -5325,7 +5483,7 @@ type_no_void: - $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); - if ($$->is_minimum_precision) - { -- if (ctx->profile->major_version < 4) -+ if (shader_profile_version_lt(ctx, 4, 0)) - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support minimum-precision types."); -@@ -5354,7 +5512,7 @@ type: - - declaration_statement: - declaration -- | struct_declaration -+ | struct_declaration_without_vars - | typedef - { - if (!($$ = make_empty_list(ctx))) -@@ -5416,23 +5574,12 @@ type_spec: - } - - declaration: -- var_modifiers type variables_def ';' -+ variables_def_typed ';' - { -- struct hlsl_type *type; -- unsigned int modifiers = $1; -- -- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ if (!($$ = initialize_vars(ctx, $1))) - YYABORT; -- $$ = declare_vars(ctx, type, modifiers, &@1, $3); - } - --variables_def_optional: -- %empty -- { -- $$ = NULL; -- } -- | variables_def -- - variables_def: - variable_def - { -@@ -5446,6 +5593,33 @@ variables_def: - list_add_tail($$, &$3->entry); - } - -+variables_def_typed: -+ variable_def_typed -+ { -+ if (!($$ = make_empty_list(ctx))) -+ YYABORT; -+ list_add_head($$, &$1->entry); -+ -+ declare_var(ctx, $1); -+ } -+ | variables_def_typed ',' variable_def -+ { -+ struct parse_variable_def *head_def; -+ -+ assert(!list_empty($1)); -+ head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); -+ -+ assert(head_def->basic_type); -+ $3->basic_type = head_def->basic_type; -+ $3->modifiers = head_def->modifiers; -+ $3->modifiers_loc = head_def->modifiers_loc; -+ -+ declare_var(ctx, $3); -+ -+ $$ = $1; -+ list_add_tail($$, &$3->entry); -+ } -+ - variable_decl: - any_identifier arrays colon_attribute - { -@@ -5461,7 +5635,7 @@ state: - any_identifier '=' expr ';' - { - vkd3d_free($1); -- hlsl_free_instr_list($3); -+ destroy_block($3); - } - - state_block_start: -@@ -5487,6 +5661,38 @@ variable_def: - ctx->in_state_block = 0; - } - -+variable_def_typed: -+ var_modifiers struct_spec variable_def -+ { -+ unsigned int modifiers = $1; -+ struct hlsl_type *type; -+ -+ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ YYABORT; -+ -+ check_invalid_in_out_modifiers(ctx, modifiers, &@1); -+ -+ $$ = $3; -+ $$->basic_type = type; -+ $$->modifiers = modifiers; -+ $$->modifiers_loc = @1; -+ } -+ | var_modifiers type variable_def -+ { -+ unsigned int modifiers = $1; -+ struct hlsl_type *type; -+ -+ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ YYABORT; -+ -+ check_invalid_in_out_modifiers(ctx, modifiers, &@1); -+ -+ $$ = $3; -+ $$->basic_type = type; -+ $$->modifiers = modifiers; -+ $$->modifiers_loc = @1; -+ } -+ - arrays: - %empty - { -@@ -5495,17 +5701,12 @@ arrays: - } - | '[' expr ']' arrays - { -- struct hlsl_block block; - uint32_t *new_array; - unsigned int size; - -- hlsl_clone_block(ctx, &block, &ctx->static_initializers); -- list_move_tail(&block.instrs, $2); -+ size = evaluate_static_expression_as_uint(ctx, $2, &@2); - -- size = evaluate_static_expression_as_uint(ctx, &block, &@2); -- -- hlsl_block_cleanup(&block); -- vkd3d_free($2); -+ destroy_block($2); - - $$ = $4; - -@@ -5618,10 +5819,10 @@ complex_initializer: - $$.args_count = 1; - if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } -- $$.args[0] = node_from_list($1); -+ $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; - } -@@ -5653,7 +5854,7 @@ complex_initializer_list: - $$.args = new_args; - for (i = 0; i < $3.args_count; ++i) - $$.args[$$.args_count++] = $3.args[i]; -- list_move_tail($$.instrs, $3.instrs); -+ hlsl_block_add_block($$.instrs, $3.instrs); - free_parse_initializer(&$3); - } - -@@ -5666,10 +5867,10 @@ initializer_expr_list: - $$.args_count = 1; - if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } -- $$.args[0] = node_from_list($1); -+ $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; - } -@@ -5681,13 +5882,13 @@ initializer_expr_list: - if (!(new_args = hlsl_realloc(ctx, $$.args, ($$.args_count + 1) * sizeof(*$$.args)))) - { - free_parse_initializer(&$$); -- destroy_instr_list($3); -+ destroy_block($3); - YYABORT; - } - $$.args = new_args; -- $$.args[$$.args_count++] = node_from_list($3); -- list_move_tail($$.instrs, $3); -- vkd3d_free($3); -+ $$.args[$$.args_count++] = node_from_block($3); -+ hlsl_block_add_block($$.instrs, $3); -+ destroy_block($3); - } - - boolean: -@@ -5705,15 +5906,17 @@ statement_list: - | statement_list statement - { - $$ = $1; -- list_move_tail($$, $2); -- vkd3d_free($2); -+ hlsl_block_add_block($$, $2); -+ destroy_block($2); - } - - statement: - declaration_statement -+ { -+ $$ = list_to_block($1); -+ } - | expr_statement - | compound_statement -- | discard_statement - | jump_statement - | selection_statement - | loop_statement -@@ -5721,47 +5924,47 @@ statement: - jump_statement: - KW_RETURN expr ';' - { -- if (!add_return(ctx, $2, node_from_list($2), &@1)) -- YYABORT; - $$ = $2; -+ if (!add_return(ctx, $$, node_from_block($$), &@1)) -+ YYABORT; - } - | KW_RETURN ';' - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - if (!add_return(ctx, $$, NULL, &@1)) - YYABORT; - } -- --discard_statement: -- KW_DISCARD ';' -+ | KW_DISCARD ';' - { -- struct hlsl_ir_node *discard; -+ struct hlsl_ir_node *discard, *c; - -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; -- if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) -+ -+ if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) -+ return false; -+ hlsl_block_add_instr($$, c); -+ -+ if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) - return false; -- list_add_tail($$, &discard->entry); -+ hlsl_block_add_instr($$, discard); - } - - selection_statement: - KW_IF '(' expr ')' if_body - { -- struct hlsl_ir_node *condition = node_from_list($3); -- struct hlsl_block then_block, else_block; -+ struct hlsl_ir_node *condition = node_from_block($3); - struct hlsl_ir_node *instr; - -- hlsl_block_init(&then_block); -- list_move_tail(&then_block.instrs, $5.then_block); -- hlsl_block_init(&else_block); -- if ($5.else_block) -- list_move_tail(&else_block.instrs, $5.else_block); -- vkd3d_free($5.then_block); -- vkd3d_free($5.else_block); -- -- if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) -+ if (!(instr = hlsl_new_if(ctx, condition, $5.then_block, $5.else_block, &@1))) -+ { -+ destroy_block($5.then_block); -+ destroy_block($5.else_block); - YYABORT; -+ } -+ destroy_block($5.then_block); -+ destroy_block($5.else_block); - if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) - { - struct vkd3d_string_buffer *string; -@@ -5772,7 +5975,7 @@ selection_statement: - hlsl_release_string_buffer(ctx, string); - } - $$ = $3; -- list_add_tail($$, &instr->entry); -+ hlsl_block_add_instr($$, instr); - } - - if_body: -@@ -5803,14 +6006,14 @@ loop_statement: - } - | attribute_list_optional KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement - { -- $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); -+ $$ = create_loop(ctx, LOOP_FOR, &$1, list_to_block($5), $6, $7, $9, &@2); - hlsl_pop_scope(ctx); - } - - expr_optional: - %empty - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - | expr -@@ -5826,7 +6029,7 @@ func_arguments: - { - $$.args = NULL; - $$.args_count = 0; -- if (!($$.instrs = make_empty_list(ctx))) -+ if (!($$.instrs = make_empty_block(ctx))) - YYABORT; - $$.braces = false; - } -@@ -5880,7 +6083,7 @@ primary_expr: - } - | '(' expr ')' - { -- $$ = $2; -+ $$ = block_to_list($2); - } - | var_identifier '(' func_arguments ')' - { -@@ -5915,11 +6118,14 @@ primary_expr: - - postfix_expr: - primary_expr -+ { -+ $$ = list_to_block($1); -+ } - | postfix_expr OP_INC - { - if (!add_increment(ctx, $1, false, true, &@2)) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } - $$ = $1; -@@ -5928,14 +6134,14 @@ postfix_expr: - { - if (!add_increment(ctx, $1, true, true, &@2)) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } - $$ = $1; - } - | postfix_expr '.' any_identifier - { -- struct hlsl_ir_node *node = node_from_list($1); -+ struct hlsl_ir_node *node = node_from_block($1); - - if (node->data_type->class == HLSL_CLASS_STRUCT) - { -@@ -5963,7 +6169,7 @@ postfix_expr: - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); - YYABORT; - } -- list_add_tail($1, &swizzle->entry); -+ hlsl_block_add_instr($1, swizzle); - $$ = $1; - } - else -@@ -5974,17 +6180,17 @@ postfix_expr: - } - | postfix_expr '[' expr ']' - { -- struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); -+ struct hlsl_ir_node *array = node_from_block($1), *index = node_from_block($3); - -- list_move_head($1, $3); -- vkd3d_free($3); -+ hlsl_block_add_block($3, $1); -+ destroy_block($1); - -- if (!add_array_access(ctx, $1, array, index, &@2)) -+ if (!add_array_access(ctx, block_to_list($3), array, index, &@2)) - { -- destroy_instr_list($1); -+ destroy_block($3); - YYABORT; - } -- $$ = $1; -+ $$ = $3; - } - - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ -@@ -6025,14 +6231,14 @@ postfix_expr: - } - | postfix_expr '.' any_identifier '(' func_arguments ')' - { -- struct hlsl_ir_node *object = node_from_list($1); -+ struct hlsl_ir_node *object = node_from_block($1); - -- list_move_tail($1, $5.instrs); -+ hlsl_block_add_block($1, $5.instrs); - vkd3d_free($5.instrs); - -- if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) -+ if (!add_method_call(ctx, block_to_list($1), object, $3, &$5, &@3)) - { -- hlsl_free_instr_list($1); -+ destroy_block($1); - vkd3d_free($5.args); - YYABORT; - } -@@ -6046,7 +6252,7 @@ unary_expr: - { - if (!add_increment(ctx, $2, false, false, &@1)) - { -- destroy_instr_list($2); -+ destroy_block($2); - YYABORT; - } - $$ = $2; -@@ -6055,7 +6261,7 @@ unary_expr: - { - if (!add_increment(ctx, $2, true, false, &@1)) - { -- destroy_instr_list($2); -+ destroy_block($2); - YYABORT; - } - $$ = $2; -@@ -6066,23 +6272,23 @@ unary_expr: - } - | '-' unary_expr - { -- add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_list($2), &@1); -+ add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_block($2), &@1); - $$ = $2; - } - | '~' unary_expr - { -- add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_list($2), &@1); -+ add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_block($2), &@1); - $$ = $2; - } - | '!' unary_expr - { -- add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_list($2), &@1); -+ add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_block($2), &@1); - $$ = $2; - } - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ - | '(' var_modifiers type arrays ')' unary_expr - { -- struct hlsl_type *src_type = node_from_list($6)->data_type; -+ struct hlsl_type *src_type = node_from_block($6)->data_type; - struct hlsl_type *dst_type; - unsigned int i; - -@@ -6118,9 +6324,9 @@ unary_expr: - YYABORT; - } - -- if (!add_cast(ctx, $6, node_from_list($6), dst_type, &@3)) -+ if (!add_cast(ctx, block_to_list($6), node_from_block($6), dst_type, &@3)) - { -- hlsl_free_instr_list($6); -+ destroy_block($6); - YYABORT; - } - $$ = $6; -@@ -6130,131 +6336,132 @@ mul_expr: - unary_expr - | mul_expr '*' unary_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); - } - | mul_expr '/' unary_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); - } - | mul_expr '%' unary_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); - } - - add_expr: - mul_expr - | add_expr '+' mul_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); - } - | add_expr '-' mul_expr - { - struct hlsl_ir_node *neg; - -- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) -+ if (!(neg = add_unary_arithmetic_expr(ctx, $3, HLSL_OP1_NEG, node_from_block($3), &@2))) - YYABORT; -- list_add_tail($3, &neg->entry); -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); - } - - shift_expr: - add_expr - | shift_expr OP_LEFTSHIFT add_expr - { -- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); - } - | shift_expr OP_RIGHTSHIFT add_expr - { -- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); - } - - relational_expr: - shift_expr - | relational_expr '<' shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); - } - | relational_expr '>' shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); -+ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); - } - | relational_expr OP_LE shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); -+ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); - } - | relational_expr OP_GE shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); - } - - equality_expr: - relational_expr - | equality_expr OP_EQ relational_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); - } - | equality_expr OP_NE relational_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); - } - - bitand_expr: - equality_expr - | bitand_expr '&' equality_expr - { -- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); - } - - bitxor_expr: - bitand_expr - | bitxor_expr '^' bitand_expr - { -- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); - } - - bitor_expr: - bitxor_expr - | bitor_expr '|' bitxor_expr - { -- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); - } - - logicand_expr: - bitor_expr - | logicand_expr OP_AND bitor_expr - { -- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); - } - - logicor_expr: - logicand_expr - | logicor_expr OP_OR logicand_expr - { -- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); - } - - conditional_expr: - logicor_expr - | logicor_expr '?' expr ':' assignment_expr - { -- struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); -+ struct hlsl_ir_node *cond = node_from_block($1); -+ struct hlsl_ir_node *first = node_from_block($3); -+ struct hlsl_ir_node *second = node_from_block($5); - struct hlsl_type *common_type; - -- list_move_tail($1, $3); -- list_move_tail($1, $5); -- vkd3d_free($3); -- vkd3d_free($5); -+ hlsl_block_add_block($1, $3); -+ hlsl_block_add_block($1, $5); -+ destroy_block($3); -+ destroy_block($5); - - if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) - YYABORT; - -- if (!(first = add_implicit_conversion(ctx, $1, first, common_type, &@3))) -+ if (!(first = add_implicit_conversion(ctx, block_to_list($1), first, common_type, &@3))) - YYABORT; - -- if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) -+ if (!(second = add_implicit_conversion(ctx, block_to_list($1), second, common_type, &@5))) - YYABORT; - -- if (!hlsl_add_conditional(ctx, $1, cond, first, second)) -+ if (!hlsl_add_conditional(ctx, block_to_list($1), cond, first, second)) - YYABORT; - $$ = $1; - } -@@ -6264,16 +6471,16 @@ assignment_expr: - conditional_expr - | unary_expr assign_op assignment_expr - { -- struct hlsl_ir_node *lhs = node_from_list($1), *rhs = node_from_list($3); -+ struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); - - if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); - YYABORT; - } -- list_move_tail($3, $1); -- vkd3d_free($1); -- if (!add_assignment(ctx, $3, lhs, $2, rhs)) -+ hlsl_block_add_block($3, $1); -+ destroy_block($1); -+ if (!add_assignment(ctx, block_to_list($3), lhs, $2, rhs)) - YYABORT; - $$ = $3; - } -@@ -6329,6 +6536,6 @@ expr: - | expr ',' assignment_expr - { - $$ = $1; -- list_move_tail($$, $3); -- vkd3d_free($3); -+ hlsl_block_add_block($$, $3); -+ destroy_block($3); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 765b1907426..09a3ea4ca08 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -97,6 +97,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc) - { -+ enum hlsl_regset regset = hlsl_type_get_regset(deref->data_type); - struct hlsl_ir_node *offset = NULL; - struct hlsl_type *type; - unsigned int i; -@@ -111,7 +112,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st - struct hlsl_block idx_block; - - if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, -- deref->offset_regset, loc))) -+ regset, loc))) - return NULL; - - hlsl_block_add_block(block, &idx_block); -@@ -126,7 +127,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st - static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - struct hlsl_ir_node *instr) - { -- const struct hlsl_type *type; -+ struct hlsl_type *type; - struct hlsl_ir_node *offset; - struct hlsl_block block; - -@@ -145,7 +146,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der - return true; - } - -- deref->offset_regset = hlsl_type_get_regset(type); -+ deref->data_type = type; - - if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) - return false; -@@ -666,7 +667,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, - return; - list_add_after(&cf_instr->entry, &load->node.entry); - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) - return; - hlsl_block_add_instr(&then_block, jump); - -@@ -1689,7 +1690,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - { - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - -- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (!load->resource.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource must have a single uniform source."); -@@ -1704,7 +1705,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - - if (load->sampler.var) - { -- if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (!load->sampler.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Resource load sampler must have a single uniform source."); -@@ -1722,7 +1723,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - { - struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - -- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (!store->resource.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource must have a single uniform source."); -@@ -1889,7 +1890,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - - if (rhs->type != HLSL_IR_LOAD) - { -- hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); -+ hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); - return false; - } - -@@ -2066,6 +2067,97 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - return false; - } - -+/* Lower combined samples and sampler variables to synthesized separated textures and samplers. -+ * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ -+static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_resource_load *load; -+ struct vkd3d_string_buffer *name; -+ struct hlsl_ir_var *var; -+ unsigned int i; -+ -+ if (instr->type != HLSL_IR_RESOURCE_LOAD) -+ return false; -+ load = hlsl_ir_resource_load(instr); -+ -+ switch (load->load_type) -+ { -+ case HLSL_RESOURCE_LOAD: -+ case HLSL_RESOURCE_GATHER_RED: -+ case HLSL_RESOURCE_GATHER_GREEN: -+ case HLSL_RESOURCE_GATHER_BLUE: -+ case HLSL_RESOURCE_GATHER_ALPHA: -+ case HLSL_RESOURCE_SAMPLE_CMP: -+ case HLSL_RESOURCE_SAMPLE_CMP_LZ: -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ return false; -+ -+ case HLSL_RESOURCE_SAMPLE: -+ case HLSL_RESOURCE_SAMPLE_LOD: -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ break; -+ } -+ if (load->sampler.var) -+ return false; -+ -+ if (!hlsl_type_is_resource(load->resource.var->data_type)) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs."); -+ return false; -+ } -+ -+ assert(hlsl_type_get_regset(load->resource.var->data_type) == HLSL_REGSET_SAMPLERS); -+ -+ if (!(name = hlsl_get_string_buffer(ctx))) -+ return false; -+ vkd3d_string_buffer_printf(name, "%s", load->resource.var->name); -+ -+ TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); -+ -+ if (!(var = hlsl_get_var(ctx->globals, name->buffer))) -+ { -+ struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim, -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); -+ -+ /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ -+ struct hlsl_type *arr_type = load->resource.var->data_type; -+ for (i = 0; i < load->resource.path_len; ++i) -+ { -+ assert(arr_type->class == HLSL_CLASS_ARRAY); -+ texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); -+ arr_type = arr_type->e.array.type; -+ } -+ -+ if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc, false))) -+ { -+ hlsl_release_string_buffer(ctx, name); -+ return false; -+ } -+ var->is_uniform = 1; -+ var->is_separated_resource = true; -+ -+ list_add_tail(&ctx->extern_vars, &var->extern_entry); -+ } -+ hlsl_release_string_buffer(ctx, name); -+ -+ if (load->sampling_dim != var->data_type->sampler_dim) -+ { -+ hlsl_error(ctx, &load->node.loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, -+ "Cannot split combined samplers from \"%s\" if they have different usage dimensions.", -+ load->resource.var->name); -+ hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, "First use as combined sampler is here."); -+ return false; -+ -+ } -+ -+ hlsl_copy_deref(ctx, &load->sampler, &load->resource); -+ load->resource.var = var; -+ assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); -+ assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); -+ -+ return true; -+} -+ - /* Lower DIV to RCP + MUL. */ - static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -@@ -2584,6 +2676,61 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return true; - } - -+static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; -+ static const struct hlsl_constant_value zero_value; -+ struct hlsl_type *arg_type, *cmp_type; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; -+ struct hlsl_ir_jump *jump; -+ unsigned int i, count; -+ struct list instrs; -+ -+ if (instr->type != HLSL_IR_JUMP) -+ return false; -+ jump = hlsl_ir_jump(instr); -+ if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) -+ return false; -+ -+ list_init(&instrs); -+ -+ arg_type = jump->condition.node->data_type; -+ if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) -+ return false; -+ list_add_tail(&instrs, &zero->entry); -+ -+ operands[0] = jump->condition.node; -+ operands[1] = zero; -+ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); -+ if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) -+ return false; -+ list_add_tail(&instrs, &cmp->entry); -+ -+ if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) -+ return false; -+ list_add_tail(&instrs, &bool_false->entry); -+ -+ or = bool_false; -+ -+ count = hlsl_type_component_count(cmp_type); -+ for (i = 0; i < count; ++i) -+ { -+ if (!(load = hlsl_add_load_component(ctx, &instrs, cmp, i, &instr->loc))) -+ return false; -+ -+ if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) -+ return NULL; -+ list_add_tail(&instrs, &or->entry); -+ } -+ -+ list_move_tail(&instr->entry, &instrs); -+ hlsl_src_remove(&jump->condition); -+ hlsl_src_from_node(&jump->condition, or); -+ jump->type = HLSL_IR_JUMP_DISCARD_NZ; -+ -+ return true; -+} -+ - static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - switch (instr->type) -@@ -2848,8 +2995,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - index->idx.node->last_read = last_read; - break; - } -- case HLSL_IR_CONSTANT: - case HLSL_IR_JUMP: -+ { -+ struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -+ -+ if (jump->condition.node) -+ jump->condition.node->last_read = last_read; -+ break; -+ } -+ case HLSL_IR_CONSTANT: - break; - } - } -@@ -3034,7 +3188,7 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct - return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); - } - --static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_ir_resource_load *load; - struct hlsl_ir_var *var; -@@ -3046,15 +3200,16 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n - - load = hlsl_ir_resource_load(instr); - var = load->resource.var; -+ - regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); -+ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -+ return false; - - if (regset == HLSL_REGSET_SAMPLERS) - { - enum hlsl_sampler_dim dim; - - assert(!load->sampler.var); -- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -- return false; - - dim = var->objects_usage[regset][index].sampler_dim; - if (dim != load->sampling_dim) -@@ -3072,25 +3227,37 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n - return false; - } - } -- var->objects_usage[regset][index].used = true; -- var->objects_usage[regset][index].sampler_dim = load->sampling_dim; - } -- else -- { -- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -- return false; -+ var->objects_usage[regset][index].sampler_dim = load->sampling_dim; - -- var->objects_usage[regset][index].used = true; -- var->objects_usage[regset][index].sampler_dim = load->sampling_dim; -+ return false; -+} - -- if (load->sampler.var) -- { -- var = load->sampler.var; -- if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) -- return false; -+static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_resource_load *load; -+ struct hlsl_ir_var *var; -+ enum hlsl_regset regset; -+ unsigned int index; - -- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; -- } -+ if (instr->type != HLSL_IR_RESOURCE_LOAD) -+ return false; -+ -+ load = hlsl_ir_resource_load(instr); -+ var = load->resource.var; -+ -+ regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); -+ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -+ return false; -+ -+ var->objects_usage[regset][index].used = true; -+ if (load->sampler.var) -+ { -+ var = load->sampler.var; -+ if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) -+ return false; -+ -+ var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; - } - - return false; -@@ -3110,9 +3277,12 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) - { - for (i = 0; i < type->reg_size[k]; ++i) - { -- /* Samplers are only allocated until the last used one. */ -+ bool is_separated = var->is_separated_resource; -+ -+ /* Samplers (and textures separated from them) are only allocated until the last -+ * used one. */ - if (var->objects_usage[k][i].used) -- var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; -+ var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS || is_separated) ? i + 1 : type->reg_size[k]; - } - } - } -@@ -3192,10 +3362,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, - } - } - -+static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) -+{ -+ struct hlsl_constant_defs *defs = &ctx->constant_defs; -+ struct hlsl_constant_register *reg; -+ size_t i; -+ -+ for (i = 0; i < defs->count; ++i) -+ { -+ reg = &defs->regs[i]; -+ if (reg->index == (component_index / 4)) -+ { -+ reg->value.f[component_index % 4] = f; -+ return; -+ } -+ } -+ -+ if (!hlsl_array_reserve(ctx, (void **)&defs->regs, &defs->size, defs->count + 1, sizeof(*defs->regs))) -+ return; -+ reg = &defs->regs[defs->count++]; -+ memset(reg, 0, sizeof(*reg)); -+ reg->index = component_index / 4; -+ reg->value.f[component_index % 4] = f; -+} -+ - static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - struct hlsl_block *block, struct register_allocator *allocator) - { -- struct hlsl_constant_defs *defs = &ctx->constant_defs; - struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -@@ -3206,66 +3399,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - const struct hlsl_type *type = instr->data_type; -- unsigned int x, y, i, writemask, end_reg; -- unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int x, i; - - constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - -- if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, -- constant->reg.id + reg_size / 4, sizeof(*defs->values))) -- return; -- end_reg = constant->reg.id + reg_size / 4; -- if (end_reg > defs->count) -- { -- memset(&defs->values[defs->count], 0, sizeof(*defs->values) * (end_reg - defs->count)); -- defs->count = end_reg; -- } -- - assert(type->class <= HLSL_CLASS_LAST_NUMERIC); -+ assert(type->dimy == 1); -+ assert(constant->reg.writemask); - -- if (!(writemask = constant->reg.writemask)) -- writemask = (1u << type->dimx) - 1; -- -- for (y = 0; y < type->dimy; ++y) -+ for (x = 0, i = 0; x < 4; ++x) - { -- for (x = 0, i = 0; x < 4; ++x) -+ const union hlsl_constant_value_component *value; -+ float f; -+ -+ if (!(constant->reg.writemask & (1u << x))) -+ continue; -+ value = &constant->value.u[i++]; -+ -+ switch (type->base_type) - { -- const union hlsl_constant_value_component *value; -- float f; -- -- if (!(writemask & (1u << x))) -- continue; -- value = &constant->value.u[i++]; -- -- switch (type->base_type) -- { -- case HLSL_TYPE_BOOL: -- f = !!value->u; -- break; -- -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- f = value->f; -- break; -- -- case HLSL_TYPE_INT: -- f = value->i; -- break; -- -- case HLSL_TYPE_UINT: -- f = value->u; -- break; -- -- case HLSL_TYPE_DOUBLE: -- FIXME("Double constant.\n"); -- return; -- -- default: -- vkd3d_unreachable(); -- } -- defs->values[constant->reg.id + y].f[x] = f; -+ case HLSL_TYPE_BOOL: -+ f = !!value->u; -+ break; -+ -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ f = value->f; -+ break; -+ -+ case HLSL_TYPE_INT: -+ f = value->i; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ f = value->u; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ FIXME("Double constant.\n"); -+ return; -+ -+ default: -+ vkd3d_unreachable(); - } -+ -+ record_constant(ctx, constant->reg.id * 4 + x, f); - } - - break; -@@ -3297,8 +3476,6 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - struct register_allocator allocator = {0}; - struct hlsl_ir_var *var; - -- allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); -- - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->last_read) -@@ -3315,6 +3492,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - } - } - -+ allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); -+ - vkd3d_free(allocator.allocations); - } - -@@ -3497,7 +3676,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) - - LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) -+ if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) - continue; - - buffer = var1->buffer; -@@ -3508,7 +3687,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) - { - unsigned int var1_reg_size, var2_reg_size; - -- if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) -+ if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) - continue; - - if (var1 == var2 || var1->buffer != var2->buffer) -@@ -3558,7 +3737,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) - { - if (var->is_param) - var->buffer = ctx->params_buffer; -@@ -3618,7 +3797,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - } - - static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, -- uint32_t index) -+ uint32_t index, bool allocated_only) - { - const struct hlsl_ir_var *var; - unsigned int start, count; -@@ -3632,6 +3811,9 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - * bound there even if the reserved vars aren't used. */ - start = var->reg_reservation.reg_index; - count = var->data_type->reg_size[regset]; -+ -+ if (!var->regs[regset].allocated && allocated_only) -+ continue; - } - else if (var->regs[regset].allocated) - { -@@ -3672,6 +3854,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - if (count == 0) - continue; - -+ /* The variable was already allocated if it has a reservation. */ - if (var->regs[regset].allocated) - { - const struct hlsl_ir_var *reserved_object, *last_reported = NULL; -@@ -3690,7 +3873,10 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - { - index = var->regs[regset].id + i; - -- reserved_object = get_allocated_object(ctx, regset, index); -+ /* get_allocated_object() may return "var" itself, but we -+ * actually want that, otherwise we'll end up reporting the -+ * same conflict between the same two variables twice. */ -+ reserved_object = get_allocated_object(ctx, regset, index, true); - if (reserved_object && reserved_object != var && reserved_object != last_reported) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -@@ -3709,7 +3895,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - - while (available < count) - { -- if (get_allocated_object(ctx, regset, index)) -+ if (get_allocated_object(ctx, regset, index, false)) - available = 0; - else - ++available; -@@ -3853,6 +4039,7 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) - { - struct hlsl_ir_node *offset_node = deref->offset.node; -+ enum hlsl_regset regset; - unsigned int size; - - if (!offset_node) -@@ -3869,8 +4056,9 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - return false; - - *offset = hlsl_ir_constant(offset_node)->value.u[0].u; -+ regset = hlsl_type_get_regset(deref->data_type); - -- size = deref->var->data_type->reg_size[deref->offset_regset]; -+ size = deref->var->data_type->reg_size[regset]; - if (*offset >= size) - { - hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -@@ -3900,7 +4088,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -- assert(deref->offset_regset == HLSL_REGSET_NUMERIC); -+ assert(deref->data_type); -+ assert(deref->data_type->class <= HLSL_CLASS_LAST_NUMERIC); - - ret.id += offset / 4; - -@@ -4062,6 +4251,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, - "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); - -+ if (profile->major_version >= 4) -+ { -+ hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); -+ } - hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); - while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); - do -@@ -4094,6 +4287,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); - hlsl_transform_ir(ctx, lower_int_dot, body, NULL); - -+ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); -+ hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); -+ if (profile->major_version >= 4) -+ hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); -+ hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); -+ - if (profile->major_version < 4) - { - hlsl_transform_ir(ctx, lower_division, body, NULL); -@@ -4107,9 +4306,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, lower_abs, body, NULL); - } - -- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); -- hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); -- - /* TODO: move forward, remove when no longer needed */ - transform_derefs(ctx, replace_deref_path_with_offset, body); - while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 301113c8477..01c438ae212 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -80,7 +80,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - return false; - } - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (src->node.data_type->base_type) - { -@@ -160,7 +160,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - assert(type == src->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -@@ -186,6 +186,51 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - return true; - } - -+static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src->node.data_type->base_type); -+ -+ for (k = 0; k < 4; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (ctx->profile->major_version >= 4 && src->value.u[k].f == 0.0f) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, -+ "Floating point division by zero."); -+ } -+ dst->u[k].f = 1.0f / src->value.u[k].f; -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -+ "Infinities and NaNs are not allowed by the shader model."); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (src->value.u[k].d == 0.0) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, -+ "Floating point division by zero."); -+ } -+ dst->u[k].d = 1.0 / src->value.u[k].d; -+ break; -+ -+ default: -+ FIXME("Fold 'rcp' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -@@ -195,7 +240,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -@@ -223,7 +268,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - --static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { - enum hlsl_base_type type = dst_type->base_type; -@@ -232,64 +277,73 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; -- break; -- -- case HLSL_TYPE_DOUBLE: -- dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; -- break; -- - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; - break; - - default: -- FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (type) - { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; - break; - -- case HLSL_TYPE_DOUBLE: -- dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; -- break; -+ default: -+ FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ return true; -+} -+ -+static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); - -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- case HLSL_TYPE_BOOL: -- dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; -+ dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; - break; - - default: -- vkd3d_unreachable(); -+ FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; - } -- -- dst->u[k].u *= ~0u; - } - return true; - } -@@ -363,45 +417,116 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - --static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, -- const struct vkd3d_shader_location *loc) -+static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (type) -+ switch (src1->node.data_type->base_type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: -- if (src2->value.u[k].i == 0) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -- return false; -- } -- if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) -- dst->u[k].i = 0; -- else -- dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ dst->u[k].u *= ~0u; -+ } -+ return true; -+} -+ -+static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ unsigned int k; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (src1->node.data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; -+ break; -+ -+ case HLSL_TYPE_INT: -+ dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; - break; - - case HLSL_TYPE_UINT: -- if (src2->value.u[k].u == 0) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -- return false; -- } -- dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; - break; - - default: -- FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); -- return false; -+ vkd3d_unreachable(); - } -+ -+ dst->u[k].u *= ~0u; -+ } -+ return true; -+} -+ -+static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ unsigned int k; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (src1->node.data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; -+ break; -+ -+ case HLSL_TYPE_INT: -+ dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ dst->u[k].u *= ~0u; - } - return true; - } -@@ -419,6 +544,15 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); -+ break; -+ - case HLSL_TYPE_INT: - dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); - break; -@@ -448,6 +582,15 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); -+ break; -+ - case HLSL_TYPE_INT: - dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); - break; -@@ -464,8 +607,9 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - --static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, -+ const struct vkd3d_shader_location *loc) - { - enum hlsl_base_type type = dst_type->base_type; - unsigned int k; -@@ -478,19 +622,35 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - switch (type) - { - case HLSL_TYPE_INT: -+ if (src2->value.u[k].i == 0) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -+ return false; -+ } -+ if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) -+ dst->u[k].i = 0; -+ else -+ dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; -+ break; -+ - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; -+ if (src2->value.u[k].u == 0) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -+ return false; -+ } -+ dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { - enum hlsl_base_type type = dst_type->base_type; -@@ -503,41 +663,60 @@ static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; -+ dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (type) -+ switch (src1->node.data_type->base_type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); -- return false; -+ vkd3d_unreachable(); - } -+ -+ dst->u[k].u *= ~0u; - } - return true; - } -@@ -587,24 +766,42 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_neg(ctx, &res, instr->data_type, arg1); - break; - -+ case HLSL_OP1_RCP: -+ success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); -+ break; -+ - case HLSL_OP2_ADD: - success = fold_add(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_MUL: -- success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_BIT_AND: -+ case HLSL_OP2_LOGIC_AND: -+ success = fold_and(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_NEQUAL: -- success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_BIT_OR: -+ case HLSL_OP2_LOGIC_OR: -+ success = fold_or(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_BIT_XOR: -+ success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); - break; - - case HLSL_OP2_DIV: - success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); - break; - -- case HLSL_OP2_MOD: -- success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); -+ case HLSL_OP2_EQUAL: -+ success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_GEQUAL: -+ success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_LESS: -+ success = fold_less(ctx, &res, instr->data_type, arg1, arg2); - break; - - case HLSL_OP2_MAX: -@@ -615,16 +812,16 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_min(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_BIT_XOR: -- success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_MOD: -+ success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); - break; - -- case HLSL_OP2_BIT_AND: -- success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_MUL: -+ success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_BIT_OR: -- success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_NEQUAL: -+ success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); - break; - - default: -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 3542b5fac51..9725a5c7e25 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -3217,7 +3217,7 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil - { - assert(reg->idx[0].offset < compiler->temp_count); - register_info->id = compiler->temp_id + reg->idx[0].offset; -- register_info->storage_class = SpvStorageClassFunction; -+ register_info->storage_class = SpvStorageClassPrivate; - register_info->descriptor_array = NULL; - register_info->member_idx = 0; - register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -@@ -3998,6 +3998,11 @@ static void spirv_compiler_emit_interpolation_decorations(struct spirv_compiler - vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); - vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); - break; -+ case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: -+ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationNoPerspective, NULL, 0); -+ vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); -+ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); -+ break; - default: - FIXME("Unhandled interpolation mode %#x.\n", mode); - break; -@@ -5258,8 +5263,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler - WARN("Unhandled global flags %#x.\n", flags); - } - --static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t count) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - size_t function_location; -@@ -5270,11 +5274,11 @@ static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, - vkd3d_spirv_begin_function_stream_insertion(builder, function_location); - - assert(!compiler->temp_count); -- compiler->temp_count = instruction->declaration.count; -+ compiler->temp_count = count; - for (i = 0; i < compiler->temp_count; ++i) - { -- id = spirv_compiler_emit_variable(compiler, &builder->function_stream, -- SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); -+ id = spirv_compiler_emit_variable(compiler, &builder->global_stream, -+ SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - if (!i) - compiler->temp_id = id; - assert(id == compiler->temp_id + i); -@@ -6236,9 +6240,6 @@ static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) - - vkd3d_spirv_build_op_function_end(builder); - -- compiler->temp_id = 0; -- compiler->temp_count = 0; -- - if (is_in_control_point_phase(compiler)) - { - if (compiler->epilogue_function_id) -@@ -9103,9 +9104,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_GLOBAL_FLAGS: - spirv_compiler_emit_dcl_global_flags(compiler, instruction); - break; -- case VKD3DSIH_DCL_TEMPS: -- spirv_compiler_emit_dcl_temps(compiler, instruction); -- break; - case VKD3DSIH_DCL_INDEXABLE_TEMP: - spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); - break; -@@ -9426,6 +9424,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - spirv_compiler_emit_cut_stream(compiler, instruction); - break; - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: -+ case VKD3DSIH_DCL_TEMPS: - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_NOP: - /* nothing to do */ -@@ -9448,6 +9447,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - enum vkd3d_result result = VKD3D_OK; - unsigned int i; - -+ if (parser->shader_desc.temp_count) -+ spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); -+ - compiler->location.column = 0; - compiler->location.line = 1; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index d066b13ee4e..801c688a297 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -989,6 +989,8 @@ static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *i - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) - { - ins->declaration.count = *tokens; -+ if (opcode == VKD3D_SM4_OP_DCL_TEMPS) -+ priv->p.shader_desc.temp_count = max(priv->p.shader_desc.temp_count, *tokens); - } - - static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -2442,6 +2444,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - } - - shader_desc = &sm4->p.shader_desc; -+ shader_desc->is_dxil = false; - if ((ret = shader_extract_from_dxbc(&compile_info->source, - message_context, compile_info->source_name, shader_desc)) < 0) - { -@@ -2967,47 +2970,154 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ - } - } - -+struct extern_resource -+{ -+ /* var is only not NULL if this resource is a whole variable, so it may be responsible for more -+ * than one component. */ -+ const struct hlsl_ir_var *var; -+ -+ char *name; -+ struct hlsl_type *data_type; -+ bool is_user_packed; -+ -+ enum hlsl_regset regset; -+ unsigned int id, bind_count; -+}; -+ - static int sm4_compare_extern_resources(const void *a, const void *b) - { -- const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; -- const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; -- enum hlsl_regset aa_regset, bb_regset; -+ const struct extern_resource *aa = (const struct extern_resource *)a; -+ const struct extern_resource *bb = (const struct extern_resource *)b; -+ int r; - -- aa_regset = hlsl_type_get_regset(aa->data_type); -- bb_regset = hlsl_type_get_regset(bb->data_type); -+ if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) -+ return r; - -- if (aa_regset != bb_regset) -- return aa_regset - bb_regset; -+ return vkd3d_u32_compare(aa->id, bb->id); -+} -+ -+static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -+{ -+ unsigned int i; - -- return aa->regs[aa_regset].id - bb->regs[bb_regset].id; -+ for (i = 0; i < count; ++i) -+ vkd3d_free(extern_resources[i].name); -+ vkd3d_free(extern_resources); - } - --static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -+static const char *string_skip_tag(const char *string) - { -- const struct hlsl_ir_var **extern_resources = NULL; -+ if (!strncmp(string, "", strlen(""))) -+ return string + strlen(""); -+ return string; -+} -+ -+static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -+{ -+ bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; -+ struct extern_resource *extern_resources = NULL; - const struct hlsl_ir_var *var; - enum hlsl_regset regset; - size_t capacity = 0; -+ char *name; - - *count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (!hlsl_type_is_resource(var->data_type)) -- continue; -- regset = hlsl_type_get_regset(var->data_type); -- if (!var->regs[regset].allocated) -- continue; -- -- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -- sizeof(*extern_resources)))) -+ if (separate_components) - { -- *count = 0; -- return NULL; -+ unsigned int component_count = hlsl_type_component_count(var->data_type); -+ unsigned int k, regset_offset; -+ -+ for (k = 0; k < component_count; ++k) -+ { -+ struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ struct vkd3d_string_buffer *name_buffer; -+ -+ if (!hlsl_type_is_resource(component_type)) -+ continue; -+ -+ regset = hlsl_type_get_regset(component_type); -+ regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); -+ -+ if (regset_offset > var->regs[regset].bind_count) -+ continue; -+ -+ if (var->objects_usage[regset][regset_offset].used) -+ { -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ hlsl_release_string_buffer(ctx, name_buffer); -+ return NULL; -+ } -+ hlsl_release_string_buffer(ctx, name_buffer); -+ -+ extern_resources[*count].var = NULL; -+ -+ extern_resources[*count].name = name; -+ extern_resources[*count].data_type = component_type; -+ extern_resources[*count].is_user_packed = false; -+ -+ extern_resources[*count].regset = regset; -+ extern_resources[*count].id = var->regs[regset].id + regset_offset; -+ extern_resources[*count].bind_count = 1; -+ -+ ++*count; -+ } -+ } - } -+ else -+ { -+ if (!hlsl_type_is_resource(var->data_type)) -+ continue; -+ regset = hlsl_type_get_regset(var->data_type); -+ if (!var->regs[regset].allocated) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ extern_resources[*count].var = var; - -- extern_resources[*count] = var; -- ++*count; -+ extern_resources[*count].name = name; -+ extern_resources[*count].data_type = var->data_type; -+ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; -+ -+ extern_resources[*count].regset = regset; -+ extern_resources[*count].id = var->regs[regset].id; -+ extern_resources[*count].bind_count = var->regs[regset].bind_count; -+ -+ ++*count; -+ } - } - - qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); -@@ -3020,8 +3130,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; - size_t cbuffer_position, resource_position, creator_position; - const struct hlsl_profile_info *profile = ctx->profile; -- const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; -+ struct extern_resource *extern_resources; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - -@@ -3075,18 +3185,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - for (i = 0; i < extern_resources_count; ++i) - { -- enum hlsl_regset regset; -+ const struct extern_resource *resource = &extern_resources[i]; - uint32_t flags = 0; - -- var = extern_resources[i]; -- regset = hlsl_type_get_regset(var->data_type); -- -- if (var->reg_reservation.reg_type) -+ if (resource->is_user_packed) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, sm4_resource_type(var->data_type)); -- if (regset == HLSL_REGSET_SAMPLERS) -+ put_u32(&buffer, sm4_resource_type(resource->data_type)); -+ if (resource->regset == HLSL_REGSET_SAMPLERS) - { - put_u32(&buffer, 0); - put_u32(&buffer, 0); -@@ -3094,15 +3201,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - } - else - { -- unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; -+ unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource_format->dimx; - -- put_u32(&buffer, sm4_resource_format(var->data_type)); -- put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); -+ put_u32(&buffer, sm4_resource_format(resource->data_type)); -+ put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; - } -- put_u32(&buffer, var->regs[regset].id); -- put_u32(&buffer, var->regs[regset].bind_count); -+ put_u32(&buffer, resource->id); -+ put_u32(&buffer, resource->bind_count); - put_u32(&buffer, flags); - } - -@@ -3128,9 +3235,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - for (i = 0; i < extern_resources_count; ++i) - { -- var = extern_resources[i]; -+ const struct extern_resource *resource = &extern_resources[i]; - -- string_offset = put_string(&buffer, var->name); -+ string_offset = put_string(&buffer, resource->name); - set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); - } - -@@ -3236,7 +3343,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - add_section(dxbc, TAG_RDEF, &buffer); - -- vkd3d_free(extern_resources); -+ sm4_free_extern_resources(extern_resources, extern_resources_count); - } - - static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) -@@ -3346,8 +3453,9 @@ struct sm4_instruction - - static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, - unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, -- const struct hlsl_deref *deref, const struct hlsl_type *data_type) -+ const struct hlsl_deref *deref) - { -+ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); - const struct hlsl_ir_var *var = deref->var; - - if (var->is_uniform) -@@ -3362,7 +3470,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -- assert(deref->offset_regset == HLSL_REGSET_TEXTURES); -+ assert(regset == HLSL_REGSET_TEXTURES); - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } -@@ -3374,7 +3482,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -- assert(deref->offset_regset == HLSL_REGSET_UAVS); -+ assert(regset == HLSL_REGSET_UAVS); - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } -@@ -3386,7 +3494,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -- assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); -+ assert(regset == HLSL_REGSET_SAMPLERS); - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } -@@ -3484,11 +3592,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - } - - static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, -- const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) -+ const struct hlsl_deref *deref, unsigned int map_writemask) - { - unsigned int writemask; - -- sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); -+ sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref); - if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - } -@@ -3689,9 +3797,11 @@ static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, - write_sm4_instruction(buffer, &instr); - } - --static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -+static void write_sm4_dcl_samplers(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct extern_resource *resource) - { -- unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; -+ struct hlsl_type *component_type; -+ unsigned int i; - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, -@@ -3701,38 +3811,44 @@ static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const s - .dst_count = 1, - }; - -- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) -+ component_type = hlsl_type_get_component_type(ctx, resource->data_type, 0); -+ -+ if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) - instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; - -- for (i = 0; i < count; ++i) -+ assert(resource->regset == HLSL_REGSET_SAMPLERS); -+ -+ for (i = 0; i < resource->bind_count; ++i) - { -- if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) -+ if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - continue; - -- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id + i; -+ instr.dsts[0].reg.idx[0] = resource->id + i; - write_sm4_instruction(buffer, &instr); - } - } - - static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_var *var, bool uav) -+ const struct extern_resource *resource, bool uav) - { - enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; -- unsigned int i, count = var->data_type->reg_size[regset]; - struct hlsl_type *component_type; - struct sm4_instruction instr; -+ unsigned int i; - -- component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); -+ assert(resource->regset == regset); - -- for (i = 0; i < count; ++i) -+ component_type = hlsl_type_get_component_type(ctx, resource->data_type, 0); -+ -+ for (i = 0; i < resource->bind_count; ++i) - { -- if (!var->objects_usage[regset][i].used) -+ if (resource->var && !resource->var->objects_usage[regset][i].used) - continue; - - instr = (struct sm4_instruction) - { - .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, -- .dsts[0].reg.idx = {var->regs[regset].id + i}, -+ .dsts[0].reg.idx = {resource->id + i}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - -@@ -3742,11 +3858,11 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - - if (uav) - { -- switch (var->data_type->sampler_dim) -+ switch (resource->data_type->sampler_dim) - { - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; -- instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; -+ instr.byte_stride = resource->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; - break; - default: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; -@@ -4008,11 +4124,11 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff - } - - static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, -- const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, -- const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, -- enum hlsl_sampler_dim dim) -+ const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, -+ const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, -+ const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) - { -+ const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, resource); - bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); -@@ -4052,7 +4168,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - - sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); - -- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); -+ sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); - - instr.src_count = 2; - -@@ -4089,7 +4205,6 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_resource_load *load) - { -- const struct hlsl_type *resource_type = load->resource.var->data_type; - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; - const struct hlsl_deref *resource = &load->resource; -@@ -4142,8 +4257,8 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); -- sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ sm4_src_from_deref(ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 3; - - if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD -@@ -4313,7 +4428,7 @@ static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecod - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - -- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); -+ sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -@@ -4369,11 +4484,31 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); - break; - -+ case HLSL_OP1_DSX_COARSE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSX_FINE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); -+ break; -+ - case HLSL_OP1_DSY: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); - break; - -+ case HLSL_OP1_DSY_COARSE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSY_FINE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); -+ break; -+ - case HLSL_OP1_EXP2: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); -@@ -4780,19 +4915,13 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - -- case HLSL_IR_JUMP_DISCARD: -+ case HLSL_IR_JUMP_DISCARD_NZ: - { -- struct sm4_register *reg = &instr.srcs[0].reg; -- - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; - - memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); -- instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - instr.src_count = 1; -- reg->type = VKD3D_SM4_RT_IMMCONST; -- reg->dim = VKD3D_SM4_DIMENSION_SCALAR; -- reg->immconst_uint[0] = ~0u; -- -+ sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); - break; - } - -@@ -4800,7 +4929,7 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, - vkd3d_unreachable(); - - default: -- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); -+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - return; - } - -@@ -4839,7 +4968,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, - - instr.opcode = VKD3D_SM4_OP_MOVC; - -- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); -+ sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); - - memset(&value, 0xff, sizeof(value)); - sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); -@@ -4851,7 +4980,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, - { - instr.opcode = VKD3D_SM4_OP_MOV; - -- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); -+ sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); - instr.src_count = 1; - } - -@@ -4875,8 +5004,7 @@ static void write_sm4_loop(struct hlsl_ctx *ctx, - } - - static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, -- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, -+ const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) - { - struct sm4_src_register *src; -@@ -4906,10 +5034,10 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - } - } - -- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); -+ sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask); - - src = &instr.srcs[instr.src_count++]; -- sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); - src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; - src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; - src->swizzle = swizzle; -@@ -4920,34 +5048,16 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - static void write_sm4_resource_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) - { -- const struct hlsl_type *resource_type = load->resource.var->data_type; - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *sample_index = load->sample_index.node; - const struct hlsl_ir_node *coords = load->coords.node; - -- if (!hlsl_type_is_resource(resource_type)) -+ if (load->sampler.var && !load->sampler.var->is_uniform) - { -- hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); -+ hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - -- if (load->sampler.var) -- { -- const struct hlsl_type *sampler_type = load->sampler.var->data_type; -- -- if (!hlsl_type_is_resource(sampler_type)) -- { -- hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); -- return; -- } -- -- if (!load->sampler.var->is_uniform) -- { -- hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); -- return; -- } -- } -- - if (!load->resource.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); -@@ -4957,7 +5067,7 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, - switch (load->load_type) - { - case HLSL_RESOURCE_LOAD: -- write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, -+ write_sm4_ld(ctx, buffer, &load->node, &load->resource, - coords, sample_index, texel_offset, load->sampling_dim); - break; - -@@ -4967,32 +5077,29 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, - case HLSL_RESOURCE_SAMPLE_LOD: - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_GRAD: -- if (!load->sampler.var) -- { -- hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); -- return; -- } -+ /* Combined sample expressions were lowered. */ -+ assert(load->sampler.var); - write_sm4_sample(ctx, buffer, load); - break; - - case HLSL_RESOURCE_GATHER_RED: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); -+ write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(X, X, X, X), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_GREEN: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); -+ write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_BLUE: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); -+ write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_ALPHA: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); -+ write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(W, W, W, W), texel_offset); - break; - } - } -@@ -5000,13 +5107,7 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, - static void write_sm4_resource_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) - { -- const struct hlsl_type *resource_type = store->resource.var->data_type; -- -- if (!hlsl_type_is_resource(resource_type)) -- { -- hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); -- return; -- } -+ struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); - - if (!store->resource.var->is_uniform) - { -@@ -5016,7 +5117,7 @@ static void write_sm4_resource_store(struct hlsl_ctx *ctx, - - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { -- hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); -+ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); - return; - } - -@@ -5033,7 +5134,7 @@ static void write_sm4_store(struct hlsl_ctx *ctx, - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - -- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); -+ sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); - instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - -@@ -5144,8 +5245,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) - { - const struct hlsl_profile_info *profile = ctx->profile; -- const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; -+ struct extern_resource *extern_resources; - unsigned int extern_resources_count, i; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; -@@ -5177,17 +5278,14 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - - for (i = 0; i < extern_resources_count; ++i) - { -- enum hlsl_regset regset; -+ const struct extern_resource *resource = &extern_resources[i]; - -- var = extern_resources[i]; -- regset = hlsl_type_get_regset(var->data_type); -- -- if (regset == HLSL_REGSET_SAMPLERS) -- write_sm4_dcl_samplers(&buffer, var); -- else if (regset == HLSL_REGSET_TEXTURES) -- write_sm4_dcl_textures(ctx, &buffer, var, false); -- else if (regset == HLSL_REGSET_UAVS) -- write_sm4_dcl_textures(ctx, &buffer, var, true); -+ if (resource->regset == HLSL_REGSET_SAMPLERS) -+ write_sm4_dcl_samplers(ctx, &buffer, resource); -+ else if (resource->regset == HLSL_REGSET_TEXTURES) -+ write_sm4_dcl_textures(ctx, &buffer, resource, false); -+ else if (resource->regset == HLSL_REGSET_UAVS) -+ write_sm4_dcl_textures(ctx, &buffer, resource, true); - } - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -@@ -5210,7 +5308,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - - add_section(dxbc, TAG_SHDR, &buffer); - -- vkd3d_free(extern_resources); -+ sm4_free_extern_resources(extern_resources, extern_resources_count); - } - - int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 343fdb2252e..33d8c60e59a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -415,6 +415,8 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t - return "hlsl"; - case VKD3D_SHADER_SOURCE_D3D_BYTECODE: - return "d3dbc"; -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ return "dxil"; - default: - FIXME("Unhandled source type %#x.\n", type); - return "bin"; -@@ -440,6 +442,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, - shader_get_source_type_suffix(source_type), shader->code, shader->size); - } - -+static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) -+{ -+ struct vkd3d_shader_scan_signature_info *signature_info; -+ -+ if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) -+ { -+ memset(&signature_info->input, 0, sizeof(signature_info->input)); -+ memset(&signature_info->output, 0, sizeof(signature_info->output)); -+ memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); -+ } -+} -+ - bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, - struct vkd3d_shader_message_context *message_context, const char *source_name, - const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, -@@ -526,6 +540,43 @@ void vkd3d_shader_free_messages(char *messages) - vkd3d_free(messages); - } - -+static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, -+ const struct shader_signature *src) -+{ -+ unsigned int i; -+ -+ signature->element_count = src->element_count; -+ if (!src->elements) -+ { -+ assert(!signature->element_count); -+ signature->elements = NULL; -+ return true; -+ } -+ -+ if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) -+ return false; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ struct vkd3d_shader_signature_element *d = &signature->elements[i]; -+ struct signature_element *e = &src->elements[i]; -+ -+ d->semantic_name = e->semantic_name; -+ d->semantic_index = e->semantic_index; -+ d->stream_index = e->stream_index; -+ d->sysval_semantic = e->sysval_semantic; -+ d->component_type = e->component_type; -+ d->register_index = e->register_index; -+ if (e->register_count > 1) -+ FIXME("Arrayed elements are not supported yet.\n"); -+ d->mask = e->mask; -+ d->used_mask = e->used_mask; -+ d->min_precision = e->min_precision; -+ } -+ -+ return true; -+} -+ - struct vkd3d_shader_scan_context - { - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -@@ -1070,6 +1121,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) - { - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ struct vkd3d_shader_scan_signature_info *signature_info; - struct vkd3d_shader_instruction *instruction; - struct vkd3d_shader_scan_context context; - int ret = VKD3D_OK; -@@ -1080,6 +1132,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - scan_descriptor_info->descriptors = NULL; - scan_descriptor_info->descriptor_count = 0; - } -+ signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); - - vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); - -@@ -1099,6 +1152,21 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - } - } - -+ if (!ret && signature_info) -+ { -+ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) -+ || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, -+ &parser->shader_desc.output_signature) -+ || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, -+ &parser->shader_desc.patch_constant_signature)) -+ { -+ vkd3d_shader_free_scan_signature_info(signature_info); -+ if (scan_descriptor_info) -+ vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ } -+ - vkd3d_shader_scan_context_cleanup(&context); - return ret; - } -@@ -1139,6 +1207,24 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - -+static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_parser *parser; -+ int ret; -+ -+ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ return ret; -+ } -+ -+ ret = scan_with_parser(compile_info, message_context, parser); -+ vkd3d_shader_parser_destroy(parser); -+ -+ return ret; -+} -+ - int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) - { - struct vkd3d_shader_message_context message_context; -@@ -1152,6 +1238,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) - return ret; - -+ init_scan_signature_info(compile_info); -+ - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - - switch (compile_info->source_type) -@@ -1169,6 +1257,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - ret = scan_d3dbc(compile_info, &message_context); - break; - -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = scan_dxil(compile_info, &message_context); -+ break; -+ - default: - ERR("Unsupported source type %#x.\n", compile_info->source_type); - ret = VKD3D_ERROR_INVALID_ARGUMENT; -@@ -1291,6 +1383,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ - return VKD3D_ERROR; - } - -+static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_parser *parser; -+ int ret; -+ -+ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ return ret; -+ } -+ -+ ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); -+ -+ vkd3d_shader_parser_destroy(parser); -+ return ret; -+} -+ - int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, char **messages) - { -@@ -1305,6 +1415,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) - return ret; - -+ init_scan_signature_info(compile_info); -+ - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - - switch (compile_info->source_type) -@@ -1321,6 +1433,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - ret = compile_d3d_bytecode(compile_info, out, &message_context); - break; - -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = compile_dxbc_dxil(compile_info, out, &message_context); -+ break; -+ - default: - vkd3d_unreachable(); - } -@@ -1339,6 +1455,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ - vkd3d_free(scan_descriptor_info->descriptors); - } - -+void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) -+{ -+ TRACE("info %p.\n", info); -+ -+ vkd3d_shader_free_shader_signature(&info->input); -+ vkd3d_shader_free_shader_signature(&info->output); -+ vkd3d_shader_free_shader_signature(&info->patch_constant); -+} -+ - void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) - { - TRACE("shader_code %p.\n", shader_code); -@@ -1401,43 +1526,6 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu - desc->version = 0; - } - --static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, -- const struct shader_signature *src) --{ -- unsigned int i; -- -- signature->element_count = src->element_count; -- if (!src->elements) -- { -- assert(!signature->element_count); -- signature->elements = NULL; -- return true; -- } -- -- if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) -- return false; -- -- for (i = 0; i < signature->element_count; ++i) -- { -- struct vkd3d_shader_signature_element *d = &signature->elements[i]; -- struct signature_element *e = &src->elements[i]; -- -- d->semantic_name = e->semantic_name; -- d->semantic_index = e->semantic_index; -- d->stream_index = e->stream_index; -- d->sysval_semantic = e->sysval_semantic; -- d->component_type = e->component_type; -- d->register_index = e->register_index; -- if (e->register_count > 1) -- FIXME("Arrayed elements are not supported yet.\n"); -- d->mask = e->mask; -- d->used_mask = e->used_mask; -- d->min_precision = e->min_precision; -- } -- -- return true; --} -- - void shader_signature_cleanup(struct shader_signature *signature) - { - vkd3d_free(signature->elements); -@@ -1526,6 +1614,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns - VKD3D_SHADER_SOURCE_DXBC_TPF, - VKD3D_SHADER_SOURCE_HLSL, - VKD3D_SHADER_SOURCE_D3D_BYTECODE, -+ VKD3D_SHADER_SOURCE_DXBC_DXIL, - }; - - TRACE("count %p.\n", count); -@@ -1564,6 +1653,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - - switch (source_type) - { -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: - case VKD3D_SHADER_SOURCE_DXBC_TPF: - *count = ARRAY_SIZE(dxbc_tpf_types); - return dxbc_tpf_types; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 406d53a3391..0e93f3a556a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -145,8 +145,29 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, - VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, - VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, -+ VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, -+ VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, - - VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, -+ -+ VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY = 8000, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE = 8001, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET = 8002, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE = 8003, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE = 8004, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT = 8005, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE = 8006, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB = 8007, -+ VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT = 8008, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL = 8009, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID = 8010, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE = 8011, -+ -+ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, -+ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, -+ VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, -+ VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, -+ VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS = 8304, - }; - - enum vkd3d_shader_opcode -@@ -516,6 +537,7 @@ enum vkd3d_data_type - VKD3D_DATA_DOUBLE, - VKD3D_DATA_CONTINUED, - VKD3D_DATA_UNUSED, -+ VKD3D_DATA_UINT8, - }; - - enum vkd3d_immconst_type -@@ -802,6 +824,7 @@ struct signature_element - struct shader_signature - { - struct signature_element *elements; -+ size_t elements_capacity; - unsigned int element_count; - }; - -@@ -811,9 +834,12 @@ struct vkd3d_shader_desc - { - const uint32_t *byte_code; - size_t byte_code_size; -+ bool is_dxil; - struct shader_signature input_signature; - struct shader_signature output_signature; - struct shader_signature patch_constant_signature; -+ -+ uint32_t temp_count; - }; - - struct vkd3d_shader_register_semantic -@@ -1167,6 +1193,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); - int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); -+int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); - - void free_shader_desc(struct vkd3d_shader_desc *desc); - -@@ -1339,6 +1367,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, - } - - #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) -+#define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) - - #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') - #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 32439eec7eb..53cb5d9582c 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF - static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); - static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, - struct d3d12_fence *fence, uint64_t value); -+static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); - static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); - static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any); - -@@ -3624,7 +3625,7 @@ static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_ - static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, - struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, - const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, -- unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) -+ unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format, unsigned int layer_count) - { - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; -@@ -3651,6 +3652,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - buffer_image_copy.bufferImageHeight = 0; - vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, - src_format, src_sub_resource_idx, src_desc->MipLevels); -+ buffer_image_copy.imageSubresource.layerCount = layer_count; - src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; - buffer_image_copy.imageOffset.x = 0; - buffer_image_copy.imageOffset.y = 0; -@@ -3658,7 +3660,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx); - - buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * -- buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; -+ buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth * layer_count; - if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) - { - ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); -@@ -3684,6 +3686,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - - vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, - dst_format, dst_sub_resource_idx, dst_desc->MipLevels); -+ buffer_image_copy.imageSubresource.layerCount = layer_count; - dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; - - assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == -@@ -3813,7 +3816,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - { - d3d12_command_list_copy_incompatible_texture_region(list, - dst_resource, dst->u.SubresourceIndex, dst_format, -- src_resource, src->u.SubresourceIndex, src_format); -+ src_resource, src->u.SubresourceIndex, src_format, 1); - return; - } - -@@ -3835,6 +3838,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); - struct d3d12_resource *dst_resource, *src_resource; -+ const struct vkd3d_format *dst_format, *src_format; - const struct vkd3d_vk_device_procs *vk_procs; - VkBufferCopy vk_buffer_copy; - VkImageCopy vk_image_copy; -@@ -3867,16 +3871,29 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - else - { - layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); -+ dst_format = dst_resource->format; -+ src_format = src_resource->format; - - assert(d3d12_resource_is_texture(dst_resource)); - assert(d3d12_resource_is_texture(src_resource)); - assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); - assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); - -+ if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) -+ { -+ for (i = 0; i < dst_resource->desc.MipLevels; ++i) -+ { -+ d3d12_command_list_copy_incompatible_texture_region(list, -+ dst_resource, i, dst_format, -+ src_resource, i, src_format, layer_count); -+ } -+ return; -+ } -+ - for (i = 0; i < dst_resource->desc.MipLevels; ++i) - { - vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, -- src_resource->format, dst_resource->format, NULL, 0, 0, 0); -+ src_format, dst_format, NULL, 0, 0, 0); - vk_image_copy.dstSubresource.layerCount = layer_count; - vk_image_copy.srcSubresource.layerCount = layer_count; - VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, -@@ -6063,8 +6080,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if - return refcount; - } - -+static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) -+{ -+ switch (op->opcode) -+ { -+ case VKD3D_CS_OP_WAIT: -+ d3d12_fence_decref(op->u.wait.fence); -+ break; -+ -+ case VKD3D_CS_OP_SIGNAL: -+ d3d12_fence_decref(op->u.signal.fence); -+ break; -+ -+ case VKD3D_CS_OP_EXECUTE: -+ vkd3d_free(op->u.execute.buffers); -+ break; -+ -+ case VKD3D_CS_OP_UPDATE_MAPPINGS: -+ case VKD3D_CS_OP_COPY_MAPPINGS: -+ break; -+ } -+} -+ - static void d3d12_command_queue_op_array_destroy(struct d3d12_command_queue_op_array *array) - { -+ unsigned int i; -+ -+ for (i = 0; i < array->count; ++i) -+ d3d12_command_queue_destroy_op(&array->ops[i]); -+ - vkd3d_free(array->ops); - } - -@@ -6162,17 +6206,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc - return &array->ops[array->count++]; - } - -+static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) -+{ -+ void *buffer; -+ -+ *dst = NULL; -+ if (src) -+ { -+ if (!(buffer = vkd3d_calloc(count, elem_size))) -+ return false; -+ memcpy(buffer, src, count * elem_size); -+ *dst = buffer; -+ } -+ return true; -+} -+ -+static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) -+{ -+ vkd3d_free(update_mappings->region_start_coordinates); -+ vkd3d_free(update_mappings->region_sizes); -+ vkd3d_free(update_mappings->range_flags); -+ vkd3d_free(update_mappings->heap_range_offsets); -+ vkd3d_free(update_mappings->range_tile_counts); -+} -+ - static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, - ID3D12Resource *resource, UINT region_count, - const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, - ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, -- UINT *heap_range_offsets, UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) -+ const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) - { -- FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " -+ struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); -+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); -+ struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); -+ struct vkd3d_cs_update_mappings update_mappings = {0}; -+ struct vkd3d_cs_op_data *op; -+ -+ TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " - "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " -- "range_tile_counts %p, flags %#x stub!\n", -+ "range_tile_counts %p, flags %#x.\n", - iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, - range_flags, heap_range_offsets, range_tile_counts, flags); -+ -+ if (!region_count || !range_count) -+ return; -+ -+ if (!command_queue->supports_sparse_binding) -+ { -+ FIXME("Command queue %p does not support sparse binding.\n", command_queue); -+ return; -+ } -+ -+ if (!resource_impl->tiles.subresource_count) -+ { -+ WARN("Resource %p is not a tiled resource.\n", resource_impl); -+ return; -+ } -+ -+ if (region_count > 1 && !region_start_coordinates) -+ { -+ WARN("Region start coordinates must not be NULL when region count is > 1.\n"); -+ return; -+ } -+ -+ if (range_count > 1 && !range_tile_counts) -+ { -+ WARN("Range tile counts must not be NULL when range count is > 1.\n"); -+ return; -+ } -+ -+ update_mappings.resource = resource_impl; -+ update_mappings.heap = heap_impl; -+ if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, -+ region_start_coordinates, sizeof(*region_start_coordinates), region_count)) -+ { -+ ERR("Failed to allocate region start coordinates.\n"); -+ return; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.region_sizes, -+ region_sizes, sizeof(*region_sizes), region_count)) -+ { -+ ERR("Failed to allocate region sizes.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.range_flags, -+ range_flags, sizeof(*range_flags), range_count)) -+ { -+ ERR("Failed to allocate range flags.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, -+ heap_range_offsets, sizeof(*heap_range_offsets), range_count)) -+ { -+ ERR("Failed to allocate heap range offsets.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, -+ range_tile_counts, sizeof(*range_tile_counts), range_count)) -+ { -+ ERR("Failed to allocate range tile counts.\n"); -+ goto free_clones; -+ } -+ update_mappings.region_count = region_count; -+ update_mappings.range_count = range_count; -+ update_mappings.flags = flags; -+ -+ vkd3d_mutex_lock(&command_queue->op_mutex); -+ -+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) -+ { -+ ERR("Failed to add op.\n"); -+ goto unlock_mutex; -+ } -+ -+ op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; -+ op->u.update_mappings = update_mappings; -+ -+ d3d12_command_queue_submit_locked(command_queue); -+ -+ vkd3d_mutex_unlock(&command_queue->op_mutex); -+ return; -+ -+unlock_mutex: -+ vkd3d_mutex_unlock(&command_queue->op_mutex); -+free_clones: -+ update_mappings_cleanup(&update_mappings); - } - - static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, -@@ -6183,10 +6341,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command - const D3D12_TILE_REGION_SIZE *region_size, - D3D12_TILE_MAPPING_FLAGS flags) - { -- FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " -- "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", -+ struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); -+ struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); -+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); -+ struct vkd3d_cs_op_data *op; -+ -+ TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " -+ "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", - iface, dst_resource, dst_region_start_coordinate, src_resource, - src_region_start_coordinate, region_size, flags); -+ -+ vkd3d_mutex_lock(&command_queue->op_mutex); -+ -+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) -+ { -+ ERR("Failed to add op.\n"); -+ return; -+ } -+ op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; -+ op->u.copy_mappings.dst_resource = dst_resource_impl; -+ op->u.copy_mappings.src_resource = src_resource_impl; -+ op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; -+ op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; -+ op->u.copy_mappings.region_size = *region_size; -+ op->u.copy_mappings.flags = flags; -+ -+ d3d12_command_queue_submit_locked(command_queue); -+ -+ vkd3d_mutex_unlock(&command_queue->op_mutex); - } - - static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, -@@ -6214,8 +6396,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu - ERR("Failed to submit queue(s), vr %d.\n", vr); - - vkd3d_queue_release(vkd3d_queue); -- -- vkd3d_free(buffers); - } - - static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue) -@@ -6273,7 +6453,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm - if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) - { - ERR("Failed to add op.\n"); -- return; -+ goto done; - } - op->opcode = VKD3D_CS_OP_EXECUTE; - op->u.execute.buffers = buffers; -@@ -6281,6 +6461,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm - - d3d12_command_queue_submit_locked(command_queue); - -+done: - vkd3d_mutex_unlock(&command_queue->op_mutex); - return; - } -@@ -6348,6 +6529,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * - - if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) - { -+ ERR("Failed to add op.\n"); - hr = E_OUTOFMEMORY; - goto done; - } -@@ -6686,6 +6868,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if - - if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) - { -+ ERR("Failed to add op.\n"); - hr = E_OUTOFMEMORY; - goto done; - } -@@ -6922,22 +7105,31 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * - return d3d12_command_queue_fixup_after_flush_locked(queue); - } - d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); -- d3d12_fence_decref(fence); - break; - - case VKD3D_CS_OP_SIGNAL: - d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); -- d3d12_fence_decref(op->u.signal.fence); - break; - - case VKD3D_CS_OP_EXECUTE: - d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); - break; - -+ case VKD3D_CS_OP_UPDATE_MAPPINGS: -+ FIXME("Tiled resource binding is not supported yet.\n"); -+ update_mappings_cleanup(&op->u.update_mappings); -+ break; -+ -+ case VKD3D_CS_OP_COPY_MAPPINGS: -+ FIXME("Tiled resource mapping copying is not supported yet.\n"); -+ break; -+ - default: - vkd3d_unreachable(); - } - -+ d3d12_command_queue_destroy_op(op); -+ - *flushed_any |= true; - } - -@@ -7000,6 +7192,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, - if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) - goto fail_destroy_op_mutex; - -+ queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); -+ - d3d12_device_add_ref(queue->device = device); - - return S_OK; -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 4263dcf4184..b9a8943cc08 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -1464,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - vulkan_info->device_limits = physical_device_info->properties2.properties.limits; - vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; -+ vulkan_info->sparse_binding = features->sparseBinding; -+ vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; - vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; - vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; - vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index ea7b6859cc1..4c07d326504 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -779,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, - VkImageFormatListCreateInfoKHR format_list; - const struct vkd3d_format *format; - VkImageCreateInfo image_info; -+ uint32_t count; - VkResult vr; - - if (resource) -@@ -914,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, - if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) - resource->flags |= VKD3D_RESOURCE_LINEAR_TILING; - -+ if (sparse_resource) -+ { -+ count = 0; -+ VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, -+ image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); -+ -+ if (!count) -+ { -+ FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", -+ image_info.format, image_info.imageType, image_info.samples, image_info.usage); -+ return E_INVALIDARG; -+ } -+ } -+ - if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) - WARN("Failed to create Vulkan image, vr %d.\n", vr); - -@@ -928,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - D3D12_RESOURCE_DESC validated_desc; - VkMemoryRequirements requirements; - VkImage vk_image; -+ bool tiled; - HRESULT hr; - - assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); -@@ -940,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - desc = &validated_desc; - } - -+ tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; -+ - /* XXX: We have to create an image to get its memory requirements. */ -- if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) -+ if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) - { - VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); - VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); -@@ -1039,12 +1057,12 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, - box->back = d3d12_resource_desc_get_depth(&resource->desc, level); - } - --/* ID3D12Resource */ --static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) -+static void d3d12_resource_init_tiles(struct d3d12_resource *resource) - { -- return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); -+ resource->tiles.subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); - } - -+/* ID3D12Resource */ - static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, - REFIID riid, void **object) - { -@@ -1661,6 +1679,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d - return E_INVALIDARG; - } - -+ if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) -+ { -+ if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) -+ { -+ WARN("The device does not support tiled 3D images.\n"); -+ return E_INVALIDARG; -+ } -+ if (format->plane_count > 1) -+ { -+ WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", -+ format->dxgi_format); -+ return E_INVALIDARG; -+ } -+ } -+ - if (!d3d12_resource_validate_texture_format(desc, format) - || !d3d12_resource_validate_texture_alignment(desc, format)) - return E_INVALIDARG; -@@ -1722,6 +1755,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - - resource->desc = *desc; - -+ if (!heap_properties && !device->vk_info.sparse_binding) -+ { -+ WARN("The device does not support tiled images.\n"); -+ return E_INVALIDARG; -+ } -+ - if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) - return E_INVALIDARG; - -@@ -1787,6 +1826,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - resource->heap = NULL; - resource->heap_offset = 0; - -+ memset(&resource->tiles, 0, sizeof(resource->tiles)); -+ - if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) - { - d3d12_resource_destroy(resource, device); -@@ -1972,6 +2013,8 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - desc, initial_state, optimized_clear_value, &object))) - return hr; - -+ d3d12_resource_init_tiles(object); -+ - TRACE("Created reserved resource %p.\n", object); - - *resource = object; -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 5e46b467252..2d8138245d8 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -20,6 +20,7 @@ - - #include "vkd3d_private.h" - #include "vkd3d_shaders.h" -+#include "vkd3d_shader_utils.h" - - /* ID3D12RootSignature */ - static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) -@@ -1978,14 +1979,14 @@ static HRESULT create_shader_stage(struct d3d12_device *device, - compile_info.next = shader_interface; - compile_info.source.code = code->pShaderBytecode; - compile_info.source.size = code->BytecodeLength; -- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; - compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; - compile_info.options = options; - compile_info.option_count = ARRAY_SIZE(options); - compile_info.log_level = VKD3D_SHADER_LOG_NONE; - compile_info.source_name = NULL; - -- if ((ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) -+ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0 -+ || (ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) - { - WARN("Failed to compile shader, vkd3d result %d.\n", ret); - return hresult_from_vkd3d_result(ret); -@@ -2008,6 +2009,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER - struct vkd3d_shader_scan_descriptor_info *descriptor_info) - { - struct vkd3d_shader_compile_info compile_info; -+ enum vkd3d_result ret; - - const struct vkd3d_shader_compile_option options[] = - { -@@ -2019,13 +2021,15 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER - compile_info.next = descriptor_info; - compile_info.source.code = code->pShaderBytecode; - compile_info.source.size = code->BytecodeLength; -- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; - compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; - compile_info.options = options; - compile_info.option_count = ARRAY_SIZE(options); - compile_info.log_level = VKD3D_SHADER_LOG_NONE; - compile_info.source_name = NULL; - -+ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0) -+ return ret; -+ - return vkd3d_shader_scan(&compile_info, NULL); - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index b0150754434..c5259420acf 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -147,9 +147,12 @@ struct vkd3d_vulkan_info - unsigned int max_vertex_attrib_divisor; - - VkPhysicalDeviceLimits device_limits; -- VkPhysicalDeviceSparseProperties sparse_properties; - struct vkd3d_device_descriptor_limits descriptor_limits; - -+ VkPhysicalDeviceSparseProperties sparse_properties; -+ bool sparse_binding; -+ bool sparse_residency_3d; -+ - VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; - - unsigned int shader_extension_count; -@@ -670,6 +673,11 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); - #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 - #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 - -+struct d3d12_resource_tile_info -+{ -+ unsigned int subresource_count; -+}; -+ - /* ID3D12Resource */ - struct d3d12_resource - { -@@ -698,9 +706,16 @@ struct d3d12_resource - - struct d3d12_device *device; - -+ struct d3d12_resource_tile_info tiles; -+ - struct vkd3d_private_store private_store; - }; - -+static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) -+{ -+ return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); -+} -+ - static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) - { - return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; -@@ -1454,6 +1469,8 @@ enum vkd3d_cs_op - VKD3D_CS_OP_WAIT, - VKD3D_CS_OP_SIGNAL, - VKD3D_CS_OP_EXECUTE, -+ VKD3D_CS_OP_UPDATE_MAPPINGS, -+ VKD3D_CS_OP_COPY_MAPPINGS, - }; - - struct vkd3d_cs_wait -@@ -1474,6 +1491,30 @@ struct vkd3d_cs_execute - unsigned int buffer_count; - }; - -+struct vkd3d_cs_update_mappings -+{ -+ struct d3d12_resource *resource; -+ struct d3d12_heap *heap; -+ D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; -+ D3D12_TILE_REGION_SIZE *region_sizes; -+ D3D12_TILE_RANGE_FLAGS *range_flags; -+ UINT *heap_range_offsets; -+ UINT *range_tile_counts; -+ UINT region_count; -+ UINT range_count; -+ D3D12_TILE_MAPPING_FLAGS flags; -+}; -+ -+struct vkd3d_cs_copy_mappings -+{ -+ struct d3d12_resource *dst_resource; -+ struct d3d12_resource *src_resource; -+ D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; -+ D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; -+ D3D12_TILE_REGION_SIZE region_size; -+ D3D12_TILE_MAPPING_FLAGS flags; -+}; -+ - struct vkd3d_cs_op_data - { - enum vkd3d_cs_op opcode; -@@ -1482,6 +1523,8 @@ struct vkd3d_cs_op_data - struct vkd3d_cs_wait wait; - struct vkd3d_cs_signal signal; - struct vkd3d_cs_execute execute; -+ struct vkd3d_cs_update_mappings update_mappings; -+ struct vkd3d_cs_copy_mappings copy_mappings; - } u; - }; - -@@ -1519,6 +1562,8 @@ struct d3d12_command_queue - * set, aux_op_queue.count must be zero. */ - struct d3d12_command_queue_op_array aux_op_queue; - -+ bool supports_sparse_binding; -+ - struct vkd3d_private_store private_store; - }; - --- -2.40.1 - diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch new file mode 100644 index 00000000..e7a78b5a --- /dev/null +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch @@ -0,0 +1,16536 @@ +From 07a8a6254b68c41da1dbdd64c15d5c40868f44d4 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 28 Jun 2023 16:27:03 +1000 +Subject: [PATCH] Updated vkd3d to 4f2e07a45d0cdb82b1cbba0cfe95c87a69799865. + +Wine-staging 8.14 +--- + include/d3d12.idl | 70 +- + libs/vkd3d/Makefile.in | 1 + + libs/vkd3d/include/private/vkd3d_common.h | 22 + + .../include/private/vkd3d_shader_utils.h | 63 + + libs/vkd3d/include/vkd3d.h | 35 + + libs/vkd3d/include/vkd3d_shader.h | 282 +- + libs/vkd3d/libs/vkd3d-common/debug.c | 17 +- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 48 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 415 ++- + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 17 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 2370 +++++++++++++++++ + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 211 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 45 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1600 ++++++----- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 638 +++-- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 525 +++- + libs/vkd3d/libs/vkd3d-shader/ir.c | 230 +- + libs/vkd3d/libs/vkd3d-shader/preproc.l | 140 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 222 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 1216 +++++---- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 388 ++- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 78 +- + libs/vkd3d/libs/vkd3d/command.c | 673 +++-- + libs/vkd3d/libs/vkd3d/device.c | 22 +- + libs/vkd3d/libs/vkd3d/resource.c | 283 +- + libs/vkd3d/libs/vkd3d/state.c | 10 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 102 +- + 27 files changed, 7768 insertions(+), 1955 deletions(-) + create mode 100644 libs/vkd3d/include/private/vkd3d_shader_utils.h + create mode 100644 libs/vkd3d/libs/vkd3d-shader/dxil.c + +diff --git a/include/d3d12.idl b/include/d3d12.idl +index 4fec32d2656..5811608b94f 100644 +--- a/include/d3d12.idl ++++ b/include/d3d12.idl +@@ -44,6 +44,7 @@ const UINT D3D12_DEFAULT_STENCIL_WRITE_MASK = 0xff; + const UINT D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND = 0xffffffff; + cpp_quote("#define D3D12_FLOAT32_MAX (3.402823466e+38f)") + const UINT D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT = 32; ++const UINT D3D12_PACKED_TILE = 0xffffffff; + const UINT D3D12_UAV_SLOT_COUNT = 64; + const UINT D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; + const UINT D3D12_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; +@@ -72,6 +73,7 @@ const UINT D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT = 4096; + const UINT D3D12_STANDARD_MAXIMUM_ELEMENT_ALIGNMENT_BYTE_MULTIPLE = 4; + const UINT D3D12_TEXTURE_DATA_PITCH_ALIGNMENT = 256; + const UINT D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT = 512; ++const UINT D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES = 65536; + const UINT D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT = 4096; + const UINT D3D12_VS_INPUT_REGISTER_COUNT = 32; + const UINT D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE = 16; +@@ -1853,6 +1855,24 @@ typedef struct D3D12_WRITEBUFFERIMMEDIATE_PARAMETER + UINT32 Value; + } D3D12_WRITEBUFFERIMMEDIATE_PARAMETER; + ++typedef enum D3D12_PROTECTED_RESOURCE_SESSION_FLAGS ++{ ++ D3D12_PROTECTED_RESOURCE_SESSION_FLAG_NONE = 0, ++} D3D12_PROTECTED_RESOURCE_SESSION_FLAGS; ++cpp_quote("DEFINE_ENUM_FLAG_OPERATORS(D3D12_PROTECTED_RESOURCE_SESSION_FLAGS);") ++ ++typedef enum D3D12_PROTECTED_SESSION_STATUS ++{ ++ D3D12_PROTECTED_SESSION_STATUS_OK = 0, ++ D3D12_PROTECTED_SESSION_STATUS_INVALID = 1, ++} D3D12_PROTECTED_SESSION_STATUS; ++ ++typedef struct D3D12_PROTECTED_RESOURCE_SESSION_DESC ++{ ++ UINT NodeMask; ++ D3D12_PROTECTED_RESOURCE_SESSION_FLAGS Flags; ++} D3D12_PROTECTED_RESOURCE_SESSION_DESC; ++ + [ + uuid(c4fec28f-7966-4e95-9f94-f431cb56c3b8), + object, +@@ -2214,6 +2234,41 @@ interface ID3D12GraphicsCommandList2 : ID3D12GraphicsCommandList1 + const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes); + } + ++[ ++ uuid(a1533d18-0ac1-4084-85b9-89a96116806b), ++ object, ++ local, ++ pointer_default(unique) ++] ++interface ID3D12ProtectedSession : ID3D12DeviceChild ++{ ++ HRESULT GetStatusFence(REFIID riid, void **fence); ++ ++ D3D12_PROTECTED_SESSION_STATUS GetSessionStatus(); ++} ++ ++[ ++ uuid(6cd696f4-f289-40cc-8091-5a6c0a099c3d), ++ object, ++ local, ++ pointer_default(unique) ++] ++interface ID3D12ProtectedResourceSession : ID3D12ProtectedSession ++{ ++ D3D12_PROTECTED_RESOURCE_SESSION_DESC GetDesc(); ++} ++ ++[ ++ uuid(6fda83a7-b84c-4e38-9ac8-c7bd22016b3d), ++ object, ++ local, ++ pointer_default(unique) ++] ++interface ID3D12GraphicsCommandList3 : ID3D12GraphicsCommandList2 ++{ ++ void SetProtectedResourceSession(ID3D12ProtectedResourceSession *protected_resource_session); ++} ++ + typedef enum D3D12_TILE_RANGE_FLAGS + { + D3D12_TILE_RANGE_FLAG_NONE = 0x0, +@@ -2243,8 +2298,8 @@ interface ID3D12CommandQueue : ID3D12Pageable + ID3D12Heap *heap, + UINT range_count, + const D3D12_TILE_RANGE_FLAGS *range_flags, +- UINT *heap_range_offsets, +- UINT *range_tile_counts, ++ const UINT *heap_range_offsets, ++ const UINT *range_tile_counts, + D3D12_TILE_MAPPING_FLAGS flags); + + void CopyTileMappings(ID3D12Resource *dst_resource, +@@ -2378,6 +2433,17 @@ interface ID3D12Fence : ID3D12Pageable + HRESULT Signal(UINT64 value); + } + ++[ ++ uuid(433685fe-e22b-4ca0-a8db-b5b4f4dd0e4a), ++ object, ++ local, ++ pointer_default(unique) ++] ++interface ID3D12Fence1 : ID3D12Fence ++{ ++ D3D12_FENCE_FLAGS GetCreationFlags(); ++} ++ + [ + uuid(6102dee4-af59-4b09-b999-b44d73f09b24), + object, +diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in +index 1ba0e9f71e1..f647af11d07 100644 +--- a/libs/vkd3d/Makefile.in ++++ b/libs/vkd3d/Makefile.in +@@ -17,6 +17,7 @@ SOURCES = \ + libs/vkd3d-shader/d3d_asm.c \ + libs/vkd3d-shader/d3dbc.c \ + libs/vkd3d-shader/dxbc.c \ ++ libs/vkd3d-shader/dxil.c \ + libs/vkd3d-shader/glsl.c \ + libs/vkd3d-shader/hlsl.c \ + libs/vkd3d-shader/hlsl.l \ +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index 1ac23b4a085..f7d98f327f1 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -20,6 +20,7 @@ + #define __VKD3D_COMMON_H + + #include "config.h" ++#define WIN32_LEAN_AND_MEAN + #include "windows.h" + #include "vkd3d_types.h" + +@@ -28,6 +29,7 @@ + #include + #include + #include ++#include + + #ifdef _MSC_VER + #include +@@ -171,6 +173,11 @@ static inline bool vkd3d_bound_range(size_t start, size_t count, size_t limit) + #endif + } + ++static inline bool vkd3d_object_range_overflow(size_t start, size_t count, size_t size) ++{ ++ return (~(size_t)0 - start) / size < count; ++} ++ + static inline uint16_t vkd3d_make_u16(uint8_t low, uint8_t high) + { + return low | ((uint16_t)high << 8); +@@ -186,6 +193,21 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) + return (x > y) - (x < y); + } + ++static inline bool bitmap_clear(uint32_t *map, unsigned int idx) ++{ ++ return map[idx >> 5] &= ~(1u << (idx & 0x1f)); ++} ++ ++static inline bool bitmap_set(uint32_t *map, unsigned int idx) ++{ ++ return map[idx >> 5] |= (1u << (idx & 0x1f)); ++} ++ ++static inline bool bitmap_is_set(const uint32_t *map, unsigned int idx) ++{ ++ return map[idx >> 5] & (1u << (idx & 0x1f)); ++} ++ + static inline int ascii_isupper(int c) + { + return 'A' <= c && c <= 'Z'; +diff --git a/libs/vkd3d/include/private/vkd3d_shader_utils.h b/libs/vkd3d/include/private/vkd3d_shader_utils.h +new file mode 100644 +index 00000000000..00052a89988 +--- /dev/null ++++ b/libs/vkd3d/include/private/vkd3d_shader_utils.h +@@ -0,0 +1,63 @@ ++/* ++ * Copyright 2023 Conor McCarthy for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_SHADER_UTILS_H ++#define __VKD3D_SHADER_UTILS_H ++ ++#include "vkd3d_shader.h" ++ ++#define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') ++#define TAG_SHDR VKD3D_MAKE_TAG('S', 'H', 'D', 'R') ++#define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') ++ ++static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct vkd3d_shader_code *dxbc, ++ enum vkd3d_shader_source_type *type, char **messages) ++{ ++ struct vkd3d_shader_dxbc_desc desc; ++ enum vkd3d_result ret; ++ unsigned int i; ++ ++ *type = VKD3D_SHADER_SOURCE_NONE; ++ ++ if ((ret = vkd3d_shader_parse_dxbc(dxbc, 0, &desc, messages)) < 0) ++ return ret; ++ ++ for (i = 0; i < desc.section_count; ++i) ++ { ++ uint32_t tag = desc.sections[i].tag; ++ if (tag == TAG_SHDR || tag == TAG_SHEX) ++ { ++ *type = VKD3D_SHADER_SOURCE_DXBC_TPF; ++ } ++ else if (tag == TAG_DXIL) ++ { ++ *type = VKD3D_SHADER_SOURCE_DXBC_DXIL; ++ /* Default to DXIL if both are present. */ ++ break; ++ } ++ } ++ ++ vkd3d_shader_free_dxbc(&desc); ++ ++ if (*type == VKD3D_SHADER_SOURCE_NONE) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ return VKD3D_OK; ++} ++ ++#endif /* __VKD3D_SHADER_UTILS_H */ +diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h +index 72ed3ced671..2ccda47248a 100644 +--- a/libs/vkd3d/include/vkd3d.h ++++ b/libs/vkd3d/include/vkd3d.h +@@ -207,7 +207,42 @@ VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); + VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device); + + VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); ++ ++/** ++ * Acquire the Vulkan queue backing a command queue. ++ * ++ * While a queue is acquired by the client, it is locked so that ++ * neither the vkd3d library nor other threads can submit work to ++ * it. For that reason it should be released as soon as possible with ++ * vkd3d_release_vk_queue(). The lock is not reentrant, so the same ++ * queue must not be acquired more than once by the same thread. ++ * ++ * Work submitted through the Direct3D 12 API exposed by vkd3d is not ++ * always immediately submitted to the Vulkan queue; sometimes it is ++ * kept in another internal queue, which might not necessarily be ++ * empty at the time vkd3d_acquire_vk_queue() is called. For this ++ * reason, work submitted directly to the Vulkan queue might appear to ++ * the Vulkan driver as being submitted before other work submitted ++ * though the Direct3D 12 API. If this is not desired, it is ++ * recommended to synchronize work submission using an ID3D12Fence ++ * object, by submitting to the queue a signal operation after all the ++ * Direct3D 12 work is submitted and waiting for it before calling ++ * vkd3d_acquire_vk_queue(). ++ * ++ * \since 1.0 ++ */ + VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); ++ ++/** ++ * Release the Vulkan queue backing a command queue. ++ * ++ * This must be paired to an earlier corresponding ++ * vkd3d_acquire_vk_queue(). After this function is called, the Vulkan ++ * queue returned by vkd3d_acquire_vk_queue() must not be used any ++ * more. ++ * ++ * \since 1.0 ++ */ + VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue); + + VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index 274241546ea..e98aad4fe95 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -85,6 +85,16 @@ enum vkd3d_shader_structure_type + * \since 1.3 + */ + VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, ++ /** ++ * The structure is a vkd3d_shader_scan_signature_info structure. ++ * \since 1.9 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, ++ /** ++ * The structure is a vkd3d_shader_next_stage_info structure. ++ * \since 1.9 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), + }; +@@ -134,6 +144,14 @@ enum vkd3d_shader_compile_option_formatting_flags + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), + }; + ++enum vkd3d_shader_compile_option_pack_matrix_order ++{ ++ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR = 0x00000001, ++ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR = 0x00000002, ++ ++ VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER), ++}; ++ + enum vkd3d_shader_compile_option_name + { + /** +@@ -164,6 +182,15 @@ enum vkd3d_shader_compile_option_name + * \since 1.7 + */ + VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE = 0x00000006, ++ /** ++ * This option specifies default matrix packing order. It's only supported for HLSL source type. ++ * Explicit variable modifiers or pragmas will take precedence. ++ * ++ * \a value is a member of enum vkd3d_shader_compile_option_pack_matrix_order. ++ * ++ * \since 1.9 ++ */ ++ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER = 0x00000007, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), + }; +@@ -327,6 +354,25 @@ struct vkd3d_shader_parameter + } u; + }; + ++/** ++ * Symbolic register indices for mapping uniform constant register sets in ++ * legacy Direct3D bytecode to constant buffer views in the target environment. ++ * ++ * Members of this enumeration are used in ++ * \ref vkd3d_shader_resource_binding.register_index. ++ * ++ * \since 1.9 ++ */ ++enum vkd3d_shader_d3dbc_constant_register ++{ ++ /** The float constant register set, c# in Direct3D assembly. */ ++ VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER = 0x0, ++ /** The integer constant register set, i# in Direct3D assembly. */ ++ VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER = 0x1, ++ /** The boolean constant register set, b# in Direct3D assembly. */ ++ VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER = 0x2, ++}; ++ + /** + * Describes the mapping of a single resource or resource array to its binding + * point in the target environment. +@@ -351,7 +397,14 @@ struct vkd3d_shader_resource_binding + * support multiple register spaces, this parameter must be set to 0. + */ + unsigned int register_space; +- /** Register index of the DXBC resource. */ ++ /** ++ * Register index of the Direct3D resource. ++ * ++ * For legacy Direct3D shaders, vkd3d-shader maps each constant register ++ * set to a single constant buffer view. This parameter names the register ++ * set to map, and must be a member of ++ * enum vkd3d_shader_d3dbc_constant_register. ++ */ + unsigned int register_index; + /** Shader stage(s) to which the resource is visible. */ + enum vkd3d_shader_visibility shader_visibility; +@@ -611,6 +664,11 @@ enum vkd3d_shader_source_type + * model 1, 2, and 3 shaders. \since 1.3 + */ + VKD3D_SHADER_SOURCE_D3D_BYTECODE, ++ /** ++ * A 'DirectX Intermediate Language' shader embedded in a DXBC container. This is ++ * the format used for Direct3D shader model 6 shaders. \since 1.9 ++ */ ++ VKD3D_SHADER_SOURCE_DXBC_DXIL, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), + }; +@@ -620,7 +678,7 @@ enum vkd3d_shader_target_type + { + /** + * The shader has no type or is to be ignored. This is not a valid value +- * for vkd3d_shader_compile() or vkd3d_shader_scan(). ++ * for vkd3d_shader_compile(). + */ + VKD3D_SHADER_TARGET_NONE, + /** +@@ -1320,6 +1378,20 @@ struct vkd3d_shader_descriptor_info + * A chained structure enumerating the descriptors declared by a shader. + * + * This structure extends vkd3d_shader_compile_info. ++ * ++ * When scanning a legacy Direct3D shader, vkd3d-shader enumerates each ++ * constant register set used by the shader as a single constant buffer ++ * descriptor, as follows: ++ * - The \ref vkd3d_shader_descriptor_info.type field is set to ++ * VKD3D_SHADER_DESCRIPTOR_TYPE_CBV. ++ * - The \ref vkd3d_shader_descriptor_info.register_space field is set to zero. ++ * - The \ref vkd3d_shader_descriptor_info.register_index field is set to a ++ * member of enum vkd3d_shader_d3dbc_constant_register denoting which set ++ * is used. ++ * - The \ref vkd3d_shader_descriptor_info.count field is set to one. ++ * ++ * In summary, there may be up to three such descriptors, one for each register ++ * set used by the shader: float, integer, and boolean. + */ + struct vkd3d_shader_scan_descriptor_info + { +@@ -1551,6 +1623,134 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com + | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); + } + ++/** ++ * A chained structure containing descriptions of shader inputs and outputs. ++ * ++ * This structure is currently implemented only for DXBC and legacy D3D bytecode ++ * source types. ++ * For DXBC shaders, the returned information is parsed directly from the ++ * signatures embedded in the DXBC shader. ++ * For legacy D3D shaders, the returned information is synthesized based on ++ * registers declared or used by shader instructions. ++ * For all other shader types, the structure is zeroed. ++ * ++ * All members (except for \ref type and \ref next) are output-only. ++ * ++ * This structure is passed to vkd3d_shader_scan() and extends ++ * vkd3d_shader_compile_info. ++ * ++ * Members of this structure are allocated by vkd3d-shader and should be freed ++ * with vkd3d_shader_free_scan_signature_info() when no longer needed. ++ * ++ * All signatures may contain pointers into the input shader, and should only ++ * be accessed while the input shader remains valid. ++ * ++ * Signature elements are synthesized from legacy Direct3D bytecode as follows: ++ * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an ++ * uppercase string corresponding to the HLSL name for the usage, e.g. ++ * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. ++ * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the ++ * usage index. ++ * - The \ref vkd3d_shader_signature_element.stream_index is always 0. ++ * ++ * Signature elements are synthesized for any input or output register declared ++ * or used in a legacy Direct3D bytecode shader, including the following: ++ * - Shader model 1 and 2 colour and texture coordinate registers. ++ * - The shader model 1 pixel shader output register. ++ * - Shader model 1 and 2 vertex shader output registers (position, fog, and ++ * point size). ++ * - Shader model 3 pixel shader system value input registers (pixel position ++ * and face). ++ * ++ * \since 1.9 ++ */ ++struct vkd3d_shader_scan_signature_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** The shader input varyings. */ ++ struct vkd3d_shader_signature input; ++ ++ /** The shader output varyings. */ ++ struct vkd3d_shader_signature output; ++ ++ /** The shader patch constant varyings. */ ++ struct vkd3d_shader_signature patch_constant; ++}; ++ ++/** ++ * Describes the mapping of a output varying register in a shader stage, ++ * to an input varying register in the following shader stage. ++ * ++ * This structure is used in struct vkd3d_shader_next_stage_info. ++ */ ++struct vkd3d_shader_varying_map ++{ ++ /** ++ * The signature index (in the output signature) of the output varying. ++ * If greater than or equal to the number of elements in the output ++ * signature, signifies that the varying is consumed by the next stage but ++ * not written by this one. ++ */ ++ unsigned int output_signature_index; ++ /** The register index of the input varying to map this register to. */ ++ unsigned int input_register_index; ++ /** The mask consumed by the destination register. */ ++ unsigned int input_mask; ++}; ++ ++/** ++ * A chained structure which describes the next shader in the pipeline. ++ * ++ * This structure is optional, and should only be provided if there is in fact ++ * another shader in the pipeline. ++ * However, depending on the input and output formats, this structure may be ++ * necessary in order to generate shaders which correctly match each other. ++ * If the structure or its individual fields are not provided, vkd3d-shader ++ * will generate shaders which may be correct in isolation, but are not ++ * guaranteed to correctly match each other. ++ * ++ * This structure is passed to vkd3d_shader_compile() and extends ++ * vkd3d_shader_compile_info. ++ * ++ * This structure contains only input parameters. ++ * ++ * \since 1.9 ++ */ ++struct vkd3d_shader_next_stage_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** ++ * A mapping of output varyings in this shader stage to input varyings ++ * in the next shader stage. ++ * ++ * This mapping should include exactly one element for each varying ++ * consumed by the next shader stage. ++ * If this shader stage outputs a varying that is not consumed by the next ++ * shader stage, that varying should be absent from this array. ++ * ++ * If this field is absent, vkd3d-shader will map varyings from one stage ++ * to another based on their register index. ++ * For Direct3D shader model 3.0, such a default mapping will be incorrect ++ * unless the registers are allocated in the same order, and hence this ++ * field is necessary to correctly match inter-stage varyings. ++ * This mapping may also be necessary under other circumstances where the ++ * varying interface does not match exactly. ++ * ++ * This mapping may be constructed by vkd3d_shader_build_varying_map(). ++ */ ++ const struct vkd3d_shader_varying_map *varying_map; ++ /** The number of registers provided in \ref varying_map. */ ++ unsigned int varying_count; ++}; ++ + #ifdef LIBVKD3D_SHADER_SOURCE + # define VKD3D_SHADER_API VKD3D_EXPORT + #else +@@ -1623,12 +1823,14 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported + * + * Depending on the source and target types, this function may support the + * following chained structures: ++ * - vkd3d_shader_hlsl_source_info + * - vkd3d_shader_interface_info ++ * - vkd3d_shader_next_stage_info + * - vkd3d_shader_scan_descriptor_info ++ * - vkd3d_shader_scan_signature_info + * - vkd3d_shader_spirv_domain_shader_target_info + * - vkd3d_shader_spirv_target_info + * - vkd3d_shader_transform_feedback_info +- * - vkd3d_shader_hlsl_source_info + * + * \param compile_info A chained structure containing compilation parameters. + * +@@ -1784,6 +1986,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver + * Parse shader source code or byte code, returning various types of requested + * information. + * ++ * The \a source_type member of \a compile_info must be set to the type of the ++ * shader. ++ * ++ * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which ++ * case vkd3d_shader_scan() will return information about the shader in ++ * isolation. Alternatively, it may be set to a valid compilation target for the ++ * shader, in which case vkd3d_shader_scan() will return information that ++ * reflects the interface for a shader as it will be compiled to that target. ++ * In this case other chained structures may be appended to \a compile_info as ++ * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, ++ * such as vkd3d_shader_spirv_target_info. ++ * ++ * (For a hypothetical example, suppose the source shader distinguishes float ++ * and integer texture data, but the target environment does not support integer ++ * textures. In this case vkd3d_shader_compile() might translate integer ++ * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would ++ * accurately report whether the texture expects integer or float data, but ++ * using the relevant specific target type would report ++ * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) ++ * + * Currently this function supports the following code types: + * - VKD3D_SHADER_SOURCE_DXBC_TPF + * +@@ -1791,6 +2013,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver + * \n + * The DXBC_TPF scanner supports the following chained structures: + * - vkd3d_shader_scan_descriptor_info ++ * - vkd3d_shader_scan_signature_info + * \n + * Although the \a compile_info parameter is read-only, chained structures + * passed to this function need not be, and may serve as output parameters, +@@ -1827,12 +2050,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); + + /** +- * Read the input signature of a compiled shader, returning a structural ++ * Read the input signature of a compiled DXBC shader, returning a structural + * description which can be easily parsed by C code. + * + * This function parses a compiled shader. To parse a standalone root signature, + * use vkd3d_shader_parse_root_signature(). + * ++ * This function only parses DXBC shaders, and only retrieves the input ++ * signature. To retrieve signatures from other shader types, or other signature ++ * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. ++ * This function returns the same input signature that is returned in ++ * struct vkd3d_shader_scan_signature_info. ++ * + * \param dxbc Compiled byte code, in DXBC format. + * + * \param signature Output location in which the parsed root signature will be +@@ -2022,6 +2251,48 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb + VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, + const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); + ++/** ++ * Free members of struct vkd3d_shader_scan_signature_info allocated by ++ * vkd3d_shader_scan(). ++ * ++ * This function may free members of vkd3d_shader_scan_signature_info, but ++ * does not free the structure itself. ++ * ++ * \param info Scan information to free. ++ * ++ * \since 1.9 ++ */ ++VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); ++ ++/** ++ * Build a mapping of output varyings in a shader stage to input varyings in ++ * the following shader stage. ++ * ++ * This mapping should be used in struct vkd3d_shader_next_stage_info to ++ * compile the first shader. ++ * ++ * \param output_signature The output signature of the first shader. ++ * ++ * \param input_signature The input signature of the second shader. ++ * ++ * \param count On output, contains the number of entries written into ++ * \ref varyings. ++ * ++ * \param varyings Pointer to an output array of varyings. ++ * This must point to space for N varyings, where N is the number of elements ++ * in the input signature. ++ * ++ * \remark Valid legacy Direct3D pixel shaders have at most 12 varying inputs: ++ * 10 inter-stage varyings, face, and position. ++ * Therefore, in practice, it is safe to call this function with a ++ * pre-allocated array with a fixed size of 12. ++ * ++ * \since 1.9 ++ */ ++VKD3D_SHADER_API void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, ++ const struct vkd3d_shader_signature *input_signature, ++ unsigned int *count, struct vkd3d_shader_varying_map *varyings); ++ + #endif /* VKD3D_SHADER_NO_PROTOTYPES */ + + /** Type of vkd3d_shader_get_version(). */ +@@ -2087,6 +2358,9 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, + typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, + const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); + ++/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ ++typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); ++ + #ifdef __cplusplus + } + #endif /* __cplusplus */ +diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c +index b363efbd360..aa7df5bd764 100644 +--- a/libs/vkd3d/libs/vkd3d-common/debug.c ++++ b/libs/vkd3d/libs/vkd3d-common/debug.c +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + #ifdef HAVE_PTHREAD_H + #include + #endif +@@ -44,11 +45,11 @@ extern const char *const vkd3d_dbg_env_name; + + static const char *const debug_level_names[] = + { +- /* VKD3D_DBG_LEVEL_NONE */ "none", +- /* VKD3D_DBG_LEVEL_ERR */ "err", +- /* VKD3D_DBG_LEVEL_FIXME */ "fixme", +- /* VKD3D_DBG_LEVEL_WARN */ "warn", +- /* VKD3D_DBG_LEVEL_TRACE */ "trace", ++ [VKD3D_DBG_LEVEL_NONE ] = "none", ++ [VKD3D_DBG_LEVEL_ERR ] = "err", ++ [VKD3D_DBG_LEVEL_FIXME] = "fixme", ++ [VKD3D_DBG_LEVEL_WARN ] = "warn", ++ [VKD3D_DBG_LEVEL_TRACE] = "trace", + }; + + enum vkd3d_dbg_level vkd3d_dbg_get_level(void) +@@ -105,7 +106,13 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch + + assert(level < ARRAY_SIZE(debug_level_names)); + ++#ifdef _WIN32 ++ vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); ++#elif HAVE_GETTID ++ vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); ++#else + vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); ++#endif + va_start(args, fmt); + vkd3d_dbg_voutput(fmt, args); + va_end(args); +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 0a821b5c878..d72402eb250 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -578,17 +578,17 @@ static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, e + { + static const char *const resource_type_names[] = + { +- /* VKD3D_SHADER_RESOURCE_NONE */ "none", +- /* VKD3D_SHADER_RESOURCE_BUFFER */ "buffer", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ "texture1d", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ "texture2d", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ "texture2dms", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ "texture3d", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ "texturecube", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ "texture1darray", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ "texture2darray", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ "texture2dmsarray", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ "texturecubearray", ++ [VKD3D_SHADER_RESOURCE_NONE ] = "none", ++ [VKD3D_SHADER_RESOURCE_BUFFER ] = "buffer", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_1D ] = "texture1d", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2D ] = "texture2d", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ] = "texture2dms", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_3D ] = "texture3d", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE ] = "texturecube", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY ] = "texture1darray", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY ] = "texture2darray", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = "texture2dmsarray", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = "texturecubearray", + }; + + if (type < ARRAY_SIZE(resource_type_names)) +@@ -601,19 +601,19 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, const + { + static const char *const data_type_names[] = + { +- /* VKD3D_DATA_FLOAT */ "float", +- /* VKD3D_DATA_INT */ "int", +- /* VKD3D_DATA_RESOURCE */ "resource", +- /* VKD3D_DATA_SAMPLER */ "sampler", +- /* VKD3D_DATA_UAV */ "uav", +- /* VKD3D_DATA_UINT */ "uint", +- /* VKD3D_DATA_UNORM */ "unorm", +- /* VKD3D_DATA_SNORM */ "snorm", +- /* VKD3D_DATA_OPAQUE */ "opaque", +- /* VKD3D_DATA_MIXED */ "mixed", +- /* VKD3D_DATA_DOUBLE */ "double", +- /* VKD3D_DATA_CONTINUED */ "", +- /* VKD3D_DATA_UNUSED */ "", ++ [VKD3D_DATA_FLOAT ] = "float", ++ [VKD3D_DATA_INT ] = "int", ++ [VKD3D_DATA_RESOURCE ] = "resource", ++ [VKD3D_DATA_SAMPLER ] = "sampler", ++ [VKD3D_DATA_UAV ] = "uav", ++ [VKD3D_DATA_UINT ] = "uint", ++ [VKD3D_DATA_UNORM ] = "unorm", ++ [VKD3D_DATA_SNORM ] = "snorm", ++ [VKD3D_DATA_OPAQUE ] = "opaque", ++ [VKD3D_DATA_MIXED ] = "mixed", ++ [VKD3D_DATA_DOUBLE ] = "double", ++ [VKD3D_DATA_CONTINUED] = "", ++ [VKD3D_DATA_UNUSED ] = "", + }; + const char *name; + int i; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 712613ac13b..99a5bd7a438 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -214,6 +214,9 @@ struct vkd3d_shader_sm1_parser + bool abort; + + struct vkd3d_shader_parser p; ++ ++#define MAX_CONSTANT_COUNT 8192 ++ uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; + }; + + /* This table is not order or position dependent. */ +@@ -260,9 +263,9 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = + /* Declarations */ + {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, + /* Constant definitions */ +- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, ++ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, +- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, ++ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, + /* Control flow */ + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, +@@ -327,9 +330,9 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = + /* Declarations */ + {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, + /* Constant definitions */ +- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, ++ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, +- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, ++ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, + /* Control flow */ + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, +@@ -490,6 +493,309 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader + dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; + } + ++static struct signature_element *find_signature_element(const struct shader_signature *signature, ++ const char *semantic_name, unsigned int semantic_index) ++{ ++ struct signature_element *e = signature->elements; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) ++ && e[i].semantic_index == semantic_index) ++ return &e[i]; ++ } ++ ++ return NULL; ++} ++ ++static struct signature_element *find_signature_element_by_register_index( ++ const struct shader_signature *signature, unsigned int register_index) ++{ ++ struct signature_element *e = signature->elements; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ if (e[i].register_index == register_index) ++ return &e[i]; ++ } ++ ++ return NULL; ++} ++ ++#define SM1_COLOR_REGISTER_OFFSET 8 ++ ++static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, ++ const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, ++ unsigned int register_index, bool is_dcl, unsigned int mask) ++{ ++ struct shader_signature *signature; ++ struct signature_element *element; ++ ++ if (output) ++ signature = &sm1->p.shader_desc.output_signature; ++ else ++ signature = &sm1->p.shader_desc.input_signature; ++ ++ if ((element = find_signature_element(signature, name, index))) ++ { ++ element->mask |= mask; ++ if (!is_dcl) ++ element->used_mask |= mask; ++ return true; ++ } ++ ++ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, ++ signature->element_count + 1, sizeof(*signature->elements))) ++ return false; ++ element = &signature->elements[signature->element_count++]; ++ ++ element->semantic_name = name; ++ element->semantic_index = index; ++ element->stream_index = 0; ++ element->sysval_semantic = sysval; ++ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ element->register_index = register_index; ++ element->target_location = register_index; ++ element->register_count = 1; ++ element->mask = mask; ++ element->used_mask = is_dcl ? 0 : mask; ++ element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; ++ ++ return true; ++} ++ ++static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, ++ unsigned int register_index, unsigned int mask) ++{ ++ struct shader_signature *signature; ++ struct signature_element *element; ++ ++ if (output) ++ signature = &sm1->p.shader_desc.output_signature; ++ else ++ signature = &sm1->p.shader_desc.input_signature; ++ ++ if (!(element = find_signature_element_by_register_index(signature, register_index))) ++ { ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, ++ "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); ++ return; ++ } ++ ++ element->used_mask |= mask; ++} ++ ++static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, ++ const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) ++{ ++ unsigned int register_index = reg->idx[0].offset; ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_TEMP: ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL ++ && sm1->p.shader_version.major == 1 && !register_index) ++ return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_NONE, 0, is_dcl, mask); ++ return true; ++ ++ case VKD3DSPR_INPUT: ++ /* For vertex shaders or sm3 pixel shaders, we should have already ++ * had a DCL instruction. Otherwise, this is a colour input. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) ++ { ++ add_signature_mask(sm1, false, register_index, mask); ++ return true; ++ } ++ return add_signature_element(sm1, false, "COLOR", register_index, ++ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); ++ ++ case VKD3DSPR_TEXTURE: ++ /* For vertex shaders, this is ADDR. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) ++ return true; ++ return add_signature_element(sm1, false, "TEXCOORD", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ ++ case VKD3DSPR_OUTPUT: ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) ++ { ++ /* For sm < 2 vertex shaders, this is TEXCRDOUT. ++ * ++ * For sm3 vertex shaders, this is OUTPUT, but we already ++ * should have had a DCL instruction. */ ++ if (sm1->p.shader_version.major == 3) ++ { ++ add_signature_mask(sm1, true, register_index, mask); ++ return true; ++ } ++ return add_signature_element(sm1, true, "TEXCOORD", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ } ++ /* fall through */ ++ ++ case VKD3DSPR_ATTROUT: ++ return add_signature_element(sm1, true, "COLOR", register_index, ++ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); ++ ++ case VKD3DSPR_COLOROUT: ++ return add_signature_element(sm1, true, "COLOR", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ ++ case VKD3DSPR_DEPTHOUT: ++ return add_signature_element(sm1, true, "DEPTH", 0, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); ++ ++ case VKD3DSPR_RASTOUT: ++ switch (register_index) ++ { ++ case 0: ++ return add_signature_element(sm1, true, "POSITION", 0, ++ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); ++ ++ case 1: ++ return add_signature_element(sm1, true, "FOG", 0, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); ++ ++ case 2: ++ return add_signature_element(sm1, true, "PSIZE", 0, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); ++ ++ default: ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, ++ "Invalid rasterizer output index %u.", register_index); ++ return true; ++ } ++ ++ case VKD3DSPR_MISCTYPE: ++ switch (register_index) ++ { ++ case 0: ++ return add_signature_element(sm1, false, "VPOS", 0, ++ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); ++ ++ case 1: ++ return add_signature_element(sm1, false, "VFACE", 0, ++ VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); ++ ++ default: ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, ++ "Invalid miscellaneous fragment input index %u.", register_index); ++ return true; ++ } ++ ++ default: ++ return true; ++ } ++} ++ ++static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, ++ const struct vkd3d_shader_semantic *semantic) ++{ ++ const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; ++ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; ++ unsigned int mask = semantic->resource.reg.write_mask; ++ bool output; ++ ++ static const char sm1_semantic_names[][13] = ++ { ++ [VKD3D_DECL_USAGE_POSITION ] = "POSITION", ++ [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", ++ [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", ++ [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", ++ [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", ++ [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", ++ [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", ++ [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", ++ [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", ++ [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", ++ [VKD3D_DECL_USAGE_COLOR ] = "COLOR", ++ [VKD3D_DECL_USAGE_FOG ] = "FOG", ++ [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", ++ [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", ++ }; ++ ++ if (reg->type == VKD3DSPR_OUTPUT) ++ output = true; ++ else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) ++ output = false; ++ else /* vpos and vface don't have a semantic. */ ++ return add_signature_element_from_register(sm1, reg, true, mask); ++ ++ /* sm2 pixel shaders use DCL but don't provide a semantic. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) ++ return add_signature_element_from_register(sm1, reg, true, mask); ++ ++ /* With the exception of vertex POSITION output, none of these are system ++ * values. Pixel POSITION input is not equivalent to SV_Position; the closer ++ * equivalent is VPOS, which is not declared as a semantic. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX ++ && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) ++ sysval = VKD3D_SHADER_SV_POSITION; ++ ++ return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], ++ semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); ++} ++ ++static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, ++ enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) ++{ ++ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; ++ ++ desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); ++ if (from_def) ++ { ++ /* d3d shaders have a maximum of 8192 constants; we should not overrun ++ * this array. */ ++ assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); ++ bitmap_set(sm1->constant_def_mask[set], index); ++ } ++} ++ ++static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, ++ const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) ++{ ++ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; ++ uint32_t register_index = reg->idx[0].offset; ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_TEMP: ++ desc->temp_count = max(desc->temp_count, register_index + 1); ++ break; ++ ++ case VKD3DSPR_CONST: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONST2: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONST3: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONST4: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONSTINT: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONSTBOOL: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def); ++ break; ++ ++ default: ++ break; ++ } ++ ++ add_signature_element_from_register(sm1, reg, false, mask); ++} ++ + /* Read a parameter token from the input stream, and possibly a relative + * addressing token. */ + static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, +@@ -640,6 +946,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, + range = &semantic->resource.range; + range->space = 0; + range->first = range->last = semantic->resource.reg.reg.idx[0].offset; ++ ++ add_signature_element_from_semantic(sm1, semantic); + } + + static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, +@@ -744,6 +1052,14 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, + } + } + ++static unsigned int mask_from_swizzle(unsigned int swizzle) ++{ ++ return (1u << vkd3d_swizzle_get_component(swizzle, 0)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 1)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 2)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 3)); ++} ++ + static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) + { + struct vkd3d_shader_src_param *src_params, *predicate; +@@ -817,22 +1133,28 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_FLOAT); ++ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } + else if (ins->handler_idx == VKD3DSIH_DEFB) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_SCALAR, VKD3D_DATA_UINT); ++ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } + else if (ins->handler_idx == VKD3DSIH_DEFI) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_INT); ++ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } + else + { + /* Destination token */ + if (ins->dst_count) ++ { + shader_sm1_read_dst_param(sm1, &p, dst_param); ++ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, false); ++ } + + /* Predication token */ + if (ins->predicate) +@@ -840,7 +1162,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + + /* Other source tokens */ + for (i = 0; i < ins->src_count; ++i) ++ { + shader_sm1_read_src_param(sm1, &p, &src_params[i]); ++ shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle), false); ++ } + } + + if (sm1->abort) +@@ -947,12 +1272,30 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, + return VKD3D_OK; + } + ++static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, ++ enum vkd3d_shader_d3dbc_constant_register set) ++{ ++ unsigned int j; ++ ++ /* Find the highest constant index which is not written by a DEF ++ * instruction. We can't (easily) use an FFZ function for this since it ++ * needs to be limited by the highest used register index. */ ++ for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) ++ { ++ if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) ++ return j; ++ } ++ ++ return 0; ++} ++ + int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) + { + struct vkd3d_shader_instruction_array *instructions; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_sm1_parser *sm1; ++ unsigned int i; + int ret; + + if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) +@@ -992,6 +1335,9 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + + *parser = &sm1->p; + ++ for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) ++ sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); ++ + return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; + } + +@@ -1340,7 +1686,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + else + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); +- put_u32(buffer, var->regs[r].bind_count); ++ put_u32(buffer, var->bind_count[r]); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ +@@ -1553,12 +1899,13 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + + for (i = 0; i < ctx->constant_defs.count; ++i) + { ++ const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { + .type = D3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, +- .reg = i, ++ .reg = constant_reg->index, + }; + + if (ctx->profile->major_version > 1) +@@ -1567,7 +1914,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + + write_sm1_dst_register(buffer, ®); + for (x = 0; x < 4; ++x) +- put_f32(buffer, ctx->constant_defs.values[i].f[x]); ++ put_f32(buffer, constant_reg->value.f[x]); + } + } + +@@ -1686,14 +2033,19 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + +- count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; ++ count = var->bind_count[HLSL_REGSET_SAMPLERS]; + + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + { + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; +- assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); ++ if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) ++ { ++ /* These can appear in sm4-style combined sample instructions. */ ++ hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); ++ continue; ++ } + + reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); +@@ -1844,6 +2196,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + } + ++static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++{ ++ const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); ++ ++ switch (jump->type) ++ { ++ case HLSL_IR_JUMP_DISCARD_NEG: ++ { ++ struct hlsl_reg *reg = &jump->condition.node->reg; ++ ++ struct sm1_instruction instr = ++ { ++ .opcode = VKD3D_SM1_OP_TEXKILL, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.reg = reg->id, ++ .dst.writemask = reg->writemask, ++ .has_dst = 1, ++ }; ++ ++ write_sm1_instruction(ctx, buffer, &instr); ++ break; ++ } ++ ++ default: ++ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); ++ } ++} ++ + static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_load *load = hlsl_ir_load(instr); +@@ -2038,6 +2419,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + write_sm1_expr(ctx, buffer, instr); + break; + ++ case HLSL_IR_JUMP: ++ write_sm1_jump(ctx, buffer, instr); ++ break; ++ + case HLSL_IR_LOAD: + write_sm1_load(ctx, buffer, instr); + break; +@@ -2063,7 +2448,6 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) + { + struct vkd3d_bytecode_buffer buffer = {0}; +- int ret; + + put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + +@@ -2076,10 +2460,17 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun + + put_u32(&buffer, D3DSIO_END); + +- if (!(ret = buffer.status)) ++ if (buffer.status) ++ ctx->result = buffer.status; ++ ++ if (!ctx->result) + { + out->code = buffer.data; + out->size = buffer.size; + } +- return ret; ++ else ++ { ++ vkd3d_free(buffer.data); ++ } ++ return ctx->result; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +index 3e3f06faeb5..cedc3da4a83 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +@@ -391,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s + read_dword(&ptr, &e[i].sysval_semantic); + read_dword(&ptr, &e[i].component_type); + read_dword(&ptr, &e[i].register_index); ++ e[i].target_location = e[i].register_index; + e[i].register_count = 1; + read_dword(&ptr, &mask); + e[i].mask = mask & 0xff; +@@ -493,8 +494,14 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + return ret; + break; + ++ case TAG_DXIL: + case TAG_SHDR: + case TAG_SHEX: ++ if ((section->tag == TAG_DXIL) != desc->is_dxil) ++ { ++ TRACE("Skipping chunk %#x.\n", section->tag); ++ break; ++ } + if (desc->byte_code) + FIXME("Multiple shader code chunks.\n"); + desc->byte_code = section->data.code; +@@ -505,10 +512,6 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + TRACE("Skipping AON9 shader code chunk.\n"); + break; + +- case TAG_DXIL: +- FIXME("Skipping DXIL shader model 6+ code chunk.\n"); +- break; +- + default: + TRACE("Skipping chunk %#x.\n", section->tag); + break; +@@ -529,12 +532,6 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + { + int ret; + +- desc->byte_code = NULL; +- desc->byte_code_size = 0; +- memset(&desc->input_signature, 0, sizeof(desc->input_signature)); +- memset(&desc->output_signature, 0, sizeof(desc->output_signature)); +- memset(&desc->patch_constant_signature, 0, sizeof(desc->patch_constant_signature)); +- + ret = for_each_dxbc_section(dxbc, message_context, source_name, shdr_handler, desc); + if (!desc->byte_code) + ret = VKD3D_ERROR_INVALID_ARGUMENT; +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +new file mode 100644 +index 00000000000..f9efe47f95d +--- /dev/null ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -0,0 +1,2370 @@ ++/* ++ * Copyright 2023 Conor McCarthy for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#include "vkd3d_shader_private.h" ++ ++#define VKD3D_SM6_VERSION_MAJOR(version) (((version) >> 4) & 0xf) ++#define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) ++ ++#define BITCODE_MAGIC VKD3D_MAKE_TAG('B', 'C', 0xc0, 0xde) ++ ++enum bitcode_block_id ++{ ++ BLOCKINFO_BLOCK = 0, ++ MODULE_BLOCK = 8, ++ PARAMATTR_BLOCK = 9, ++ PARAMATTR_GROUP_BLOCK = 10, ++ CONSTANTS_BLOCK = 11, ++ FUNCTION_BLOCK = 12, ++ VALUE_SYMTAB_BLOCK = 14, ++ METADATA_BLOCK = 15, ++ METADATA_ATTACHMENT_BLOCK = 16, ++ TYPE_BLOCK = 17, ++ USELIST_BLOCK = 18, ++}; ++ ++enum bitcode_blockinfo_code ++{ ++ SETBID = 1, ++ BLOCKNAME = 2, ++ SETRECORDNAME = 3, ++}; ++ ++enum bitcode_block_abbreviation ++{ ++ END_BLOCK = 0, ++ ENTER_SUBBLOCK = 1, ++ DEFINE_ABBREV = 2, ++ UNABBREV_RECORD = 3, ++}; ++ ++enum bitcode_abbrev_type ++{ ++ ABBREV_FIXED = 1, ++ ABBREV_VBR = 2, ++ ABBREV_ARRAY = 3, ++ ABBREV_CHAR = 4, ++ ABBREV_BLOB = 5, ++}; ++ ++enum bitcode_address_space ++{ ++ ADDRESS_SPACE_DEFAULT, ++ ADDRESS_SPACE_DEVICEMEM, ++ ADDRESS_SPACE_CBUFFER, ++ ADDRESS_SPACE_GROUPSHARED, ++}; ++ ++enum bitcode_module_code ++{ ++ MODULE_CODE_VERSION = 1, ++ MODULE_CODE_GLOBALVAR = 7, ++ MODULE_CODE_FUNCTION = 8, ++}; ++ ++enum bitcode_constant_code ++{ ++ CST_CODE_SETTYPE = 1, ++ CST_CODE_NULL = 2, ++ CST_CODE_UNDEF = 3, ++ CST_CODE_INTEGER = 4, ++ CST_CODE_FLOAT = 6, ++ CST_CODE_STRING = 8, ++ CST_CODE_CE_GEP = 12, ++ CST_CODE_CE_INBOUNDS_GEP = 20, ++ CST_CODE_DATA = 22, ++}; ++ ++enum bitcode_function_code ++{ ++ FUNC_CODE_DECLAREBLOCKS = 1, ++ FUNC_CODE_INST_BINOP = 2, ++ FUNC_CODE_INST_CAST = 3, ++ FUNC_CODE_INST_RET = 10, ++ FUNC_CODE_INST_BR = 11, ++ FUNC_CODE_INST_SWITCH = 12, ++ FUNC_CODE_INST_PHI = 16, ++ FUNC_CODE_INST_ALLOCA = 19, ++ FUNC_CODE_INST_LOAD = 20, ++ FUNC_CODE_INST_EXTRACTVAL = 26, ++ FUNC_CODE_INST_CMP2 = 28, ++ FUNC_CODE_INST_VSELECT = 29, ++ FUNC_CODE_INST_CALL = 34, ++ FUNC_CODE_INST_ATOMICRMW = 38, ++ FUNC_CODE_INST_LOADATOMIC = 41, ++ FUNC_CODE_INST_GEP = 43, ++ FUNC_CODE_INST_STORE = 44, ++ FUNC_CODE_INST_STOREATOMIC = 45, ++ FUNC_CODE_INST_CMPXCHG = 46, ++}; ++ ++enum bitcode_type_code ++{ ++ TYPE_CODE_NUMENTRY = 1, ++ TYPE_CODE_VOID = 2, ++ TYPE_CODE_FLOAT = 3, ++ TYPE_CODE_DOUBLE = 4, ++ TYPE_CODE_LABEL = 5, ++ TYPE_CODE_INTEGER = 7, ++ TYPE_CODE_POINTER = 8, ++ TYPE_CODE_HALF = 10, ++ TYPE_CODE_ARRAY = 11, ++ TYPE_CODE_VECTOR = 12, ++ TYPE_CODE_METADATA = 16, ++ TYPE_CODE_STRUCT_ANON = 18, ++ TYPE_CODE_STRUCT_NAME = 19, ++ TYPE_CODE_STRUCT_NAMED = 20, ++ TYPE_CODE_FUNCTION = 21, ++}; ++ ++enum bitcode_value_symtab_code ++{ ++ VST_CODE_ENTRY = 1, ++ VST_CODE_BBENTRY = 2, ++}; ++ ++struct sm6_pointer_info ++{ ++ const struct sm6_type *type; ++ enum bitcode_address_space addr_space; ++}; ++ ++struct sm6_struct_info ++{ ++ const char *name; ++ unsigned int elem_count; ++ const struct sm6_type *elem_types[]; ++}; ++ ++struct sm6_function_info ++{ ++ const struct sm6_type *ret_type; ++ unsigned int param_count; ++ const struct sm6_type *param_types[]; ++}; ++ ++struct sm6_array_info ++{ ++ unsigned int count; ++ const struct sm6_type *elem_type; ++}; ++ ++enum sm6_type_class ++{ ++ TYPE_CLASS_VOID, ++ TYPE_CLASS_INTEGER, ++ TYPE_CLASS_FLOAT, ++ TYPE_CLASS_POINTER, ++ TYPE_CLASS_STRUCT, ++ TYPE_CLASS_FUNCTION, ++ TYPE_CLASS_VECTOR, ++ TYPE_CLASS_ARRAY, ++ TYPE_CLASS_LABEL, ++ TYPE_CLASS_METADATA, ++}; ++ ++struct sm6_type ++{ ++ enum sm6_type_class class; ++ union ++ { ++ unsigned int width; ++ struct sm6_pointer_info pointer; ++ struct sm6_struct_info *struc; ++ struct sm6_function_info *function; ++ struct sm6_array_info array; ++ } u; ++}; ++ ++enum sm6_value_type ++{ ++ VALUE_TYPE_FUNCTION, ++ VALUE_TYPE_REG, ++}; ++ ++struct sm6_function_data ++{ ++ const char *name; ++ bool is_prototype; ++ unsigned int attribs_id; ++}; ++ ++struct sm6_value ++{ ++ const struct sm6_type *type; ++ enum sm6_value_type value_type; ++ bool is_undefined; ++ union ++ { ++ struct sm6_function_data function; ++ struct vkd3d_shader_register reg; ++ } u; ++}; ++ ++struct dxil_record ++{ ++ unsigned int code; ++ unsigned int operand_count; ++ uint64_t operands[]; ++}; ++ ++struct sm6_symbol ++{ ++ unsigned int id; ++ const char *name; ++}; ++ ++struct sm6_block ++{ ++ struct vkd3d_shader_instruction *instructions; ++ size_t instruction_capacity; ++ size_t instruction_count; ++}; ++ ++struct sm6_function ++{ ++ const struct sm6_value *declaration; ++ ++ struct sm6_block *blocks[1]; ++ size_t block_count; ++}; ++ ++struct dxil_block ++{ ++ const struct dxil_block *parent; ++ enum bitcode_block_id id; ++ unsigned int abbrev_len; ++ unsigned int start; ++ unsigned int length; ++ unsigned int level; ++ ++ /* The abbrev, block and record structs are not relocatable. */ ++ struct dxil_abbrev **abbrevs; ++ size_t abbrev_capacity; ++ size_t abbrev_count; ++ unsigned int blockinfo_bid; ++ bool has_bid; ++ ++ struct dxil_block **child_blocks; ++ size_t child_block_capacity; ++ size_t child_block_count; ++ ++ struct dxil_record **records; ++ size_t record_capacity; ++ size_t record_count; ++}; ++ ++struct sm6_parser ++{ ++ const uint32_t *ptr, *start, *end; ++ unsigned int bitpos; ++ ++ struct dxil_block root_block; ++ struct dxil_block *current_block; ++ ++ struct dxil_global_abbrev **abbrevs; ++ size_t abbrev_capacity; ++ size_t abbrev_count; ++ ++ struct sm6_type *types; ++ size_t type_count; ++ ++ struct sm6_symbol *global_symbols; ++ size_t global_symbol_count; ++ ++ struct sm6_function *functions; ++ size_t function_count; ++ ++ struct sm6_value *values; ++ size_t value_count; ++ size_t value_capacity; ++ ++ struct vkd3d_shader_parser p; ++}; ++ ++struct dxil_abbrev_operand ++{ ++ uint64_t context; ++ bool (*read_operand)(struct sm6_parser *sm6, uint64_t context, uint64_t *operand); ++}; ++ ++struct dxil_abbrev ++{ ++ unsigned int count; ++ bool is_array; ++ struct dxil_abbrev_operand operands[]; ++}; ++ ++struct dxil_global_abbrev ++{ ++ unsigned int block_id; ++ struct dxil_abbrev abbrev; ++}; ++ ++static size_t size_add_with_overflow_check(size_t a, size_t b) ++{ ++ size_t i = a + b; ++ return (i < a) ? SIZE_MAX : i; ++} ++ ++static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) ++{ ++ return CONTAINING_RECORD(parser, struct sm6_parser, p); ++} ++ ++static bool sm6_parser_is_end(struct sm6_parser *sm6) ++{ ++ return sm6->ptr == sm6->end; ++} ++ ++static uint32_t sm6_parser_read_uint32(struct sm6_parser *sm6) ++{ ++ if (sm6_parser_is_end(sm6)) ++ { ++ sm6->p.failed = true; ++ return 0; ++ } ++ return *sm6->ptr++; ++} ++ ++static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length) ++{ ++ unsigned int l, prev_len = 0; ++ uint32_t bits; ++ ++ if (!length) ++ return 0; ++ ++ assert(length < 32); ++ ++ if (sm6_parser_is_end(sm6)) ++ { ++ sm6->p.failed = true; ++ return 0; ++ } ++ ++ assert(sm6->bitpos < 32); ++ bits = *sm6->ptr >> sm6->bitpos; ++ l = 32 - sm6->bitpos; ++ if (l <= length) ++ { ++ ++sm6->ptr; ++ if (sm6_parser_is_end(sm6) && l < length) ++ { ++ sm6->p.failed = true; ++ return bits; ++ } ++ sm6->bitpos = 0; ++ bits |= *sm6->ptr << l; ++ prev_len = l; ++ } ++ sm6->bitpos += length - prev_len; ++ ++ return bits & ((1 << length) - 1); ++} ++ ++static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length) ++{ ++ unsigned int bits, flag, mask, shift = 0; ++ uint64_t result = 0; ++ ++ if (!length) ++ return 0; ++ ++ if (sm6_parser_is_end(sm6)) ++ { ++ sm6->p.failed = true; ++ return 0; ++ } ++ ++ flag = 1 << (length - 1); ++ mask = flag - 1; ++ do ++ { ++ bits = sm6_parser_read_bits(sm6, length); ++ result |= (uint64_t)(bits & mask) << shift; ++ shift += length - 1; ++ } while ((bits & flag) && !sm6->p.failed && shift < 64); ++ ++ sm6->p.failed |= !!(bits & flag); ++ ++ return result; ++} ++ ++static void sm6_parser_align_32(struct sm6_parser *sm6) ++{ ++ if (!sm6->bitpos) ++ return; ++ ++ if (sm6_parser_is_end(sm6)) ++ { ++ sm6->p.failed = true; ++ return; ++ } ++ ++ ++sm6->ptr; ++ sm6->bitpos = 0; ++} ++ ++static bool dxil_block_handle_blockinfo_record(struct dxil_block *block, struct dxil_record *record) ++{ ++ /* BLOCKINFO blocks must only occur immediately below the module root block. */ ++ if (block->level > 1) ++ { ++ WARN("Invalid blockinfo block level %u.\n", block->level); ++ return false; ++ } ++ ++ switch (record->code) ++ { ++ case SETBID: ++ if (!record->operand_count) ++ { ++ WARN("Missing id operand.\n"); ++ return false; ++ } ++ if (record->operands[0] > UINT_MAX) ++ WARN("Truncating block id %"PRIu64".\n", record->operands[0]); ++ block->blockinfo_bid = record->operands[0]; ++ block->has_bid = true; ++ break; ++ case BLOCKNAME: ++ case SETRECORDNAME: ++ break; ++ default: ++ FIXME("Unhandled BLOCKINFO record type %u.\n", record->code); ++ break; ++ } ++ ++ return true; ++} ++ ++static enum vkd3d_result dxil_block_add_record(struct dxil_block *block, struct dxil_record *record) ++{ ++ unsigned int reserve; ++ ++ switch (block->id) ++ { ++ /* Rough initial reserve sizes for small shaders. */ ++ case CONSTANTS_BLOCK: reserve = 32; break; ++ case FUNCTION_BLOCK: reserve = 128; break; ++ case METADATA_BLOCK: reserve = 32; break; ++ case TYPE_BLOCK: reserve = 32; break; ++ default: reserve = 8; break; ++ } ++ reserve = max(reserve, block->record_count + 1); ++ if (!vkd3d_array_reserve((void **)&block->records, &block->record_capacity, reserve, sizeof(*block->records))) ++ { ++ ERR("Failed to allocate %u records.\n", reserve); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if (block->id == BLOCKINFO_BLOCK && !dxil_block_handle_blockinfo_record(block, record)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ block->records[block->record_count++] = record; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6) ++{ ++ struct dxil_block *block = sm6->current_block; ++ enum vkd3d_result ret = VKD3D_OK; ++ unsigned int code, count, i; ++ struct dxil_record *record; ++ ++ code = sm6_parser_read_vbr(sm6, 6); ++ ++ count = sm6_parser_read_vbr(sm6, 6); ++ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) ++ { ++ ERR("Failed to allocate record with %u operands.\n", count); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ record->code = code; ++ record->operand_count = count; ++ ++ for (i = 0; i < count; ++i) ++ record->operands[i] = sm6_parser_read_vbr(sm6, 6); ++ if (sm6->p.failed) ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ ++ if (ret < 0 || (ret = dxil_block_add_record(block, record)) < 0) ++ vkd3d_free(record); ++ ++ return ret; ++} ++ ++static bool sm6_parser_read_literal_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ *op = context; ++ return !sm6->p.failed; ++} ++ ++static bool sm6_parser_read_fixed_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ *op = sm6_parser_read_bits(sm6, context); ++ return !sm6->p.failed; ++} ++ ++static bool sm6_parser_read_vbr_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ *op = sm6_parser_read_vbr(sm6, context); ++ return !sm6->p.failed; ++} ++ ++static bool sm6_parser_read_char6_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[sm6_parser_read_bits(sm6, 6)]; ++ return !sm6->p.failed; ++} ++ ++static bool sm6_parser_read_blob_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ int count = sm6_parser_read_vbr(sm6, 6); ++ sm6_parser_align_32(sm6); ++ for (; count > 0; count -= 4) ++ sm6_parser_read_uint32(sm6); ++ FIXME("Unhandled blob operand.\n"); ++ return false; ++} ++ ++static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned int count, struct sm6_parser *sm6) ++{ ++ enum bitcode_abbrev_type prev_type, type; ++ unsigned int i; ++ ++ abbrev->is_array = false; ++ ++ for (i = 0, prev_type = 0; i < count && !sm6->p.failed; ++i) ++ { ++ if (sm6_parser_read_bits(sm6, 1)) ++ { ++ if (prev_type == ABBREV_ARRAY) ++ { ++ WARN("Unexpected literal abbreviation after array.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 8); ++ abbrev->operands[i].read_operand = sm6_parser_read_literal_operand; ++ continue; ++ } ++ ++ switch (type = sm6_parser_read_bits(sm6, 3)) ++ { ++ case ABBREV_FIXED: ++ case ABBREV_VBR: ++ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 5); ++ abbrev->operands[i].read_operand = (type == ABBREV_FIXED) ? sm6_parser_read_fixed_operand ++ : sm6_parser_read_vbr_operand; ++ break; ++ ++ case ABBREV_ARRAY: ++ if (prev_type == ABBREV_ARRAY || i != count - 2) ++ { ++ WARN("Unexpected array abbreviation.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ abbrev->is_array = true; ++ --i; ++ --count; ++ break; ++ ++ case ABBREV_CHAR: ++ abbrev->operands[i].read_operand = sm6_parser_read_char6_operand; ++ break; ++ ++ case ABBREV_BLOB: ++ if (prev_type == ABBREV_ARRAY || i != count - 1) ++ { ++ WARN("Unexpected blob abbreviation.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ abbrev->operands[i].read_operand = sm6_parser_read_blob_operand; ++ break; ++ } ++ ++ prev_type = type; ++ } ++ ++ abbrev->count = count; ++ ++ return sm6->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; ++} ++ ++static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) ++{ ++ struct dxil_block *block = sm6->current_block; ++ unsigned int count = sm6_parser_read_vbr(sm6, 5); ++ struct dxil_global_abbrev *global_abbrev; ++ enum vkd3d_result ret; ++ ++ assert(block->id == BLOCKINFO_BLOCK); ++ ++ if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) ++ || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) ++ { ++ ERR("Failed to allocate global abbreviation.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if ((ret = dxil_abbrev_init(&global_abbrev->abbrev, count, sm6)) < 0) ++ { ++ vkd3d_free(global_abbrev); ++ return ret; ++ } ++ ++ if (!block->has_bid) ++ { ++ WARN("Missing blockinfo block id.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (block->blockinfo_bid == MODULE_BLOCK) ++ { ++ FIXME("Unhandled global abbreviation for module block.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ global_abbrev->block_id = block->blockinfo_bid; ++ ++ sm6->abbrevs[sm6->abbrev_count++] = global_abbrev; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result sm6_parser_add_block_abbrev(struct sm6_parser *sm6) ++{ ++ struct dxil_block *block = sm6->current_block; ++ struct dxil_abbrev *abbrev; ++ enum vkd3d_result ret; ++ unsigned int count; ++ ++ if (block->id == BLOCKINFO_BLOCK) ++ return sm6_parser_add_global_abbrev(sm6); ++ ++ count = sm6_parser_read_vbr(sm6, 5); ++ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, block->abbrev_count + 1, sizeof(*block->abbrevs)) ++ || !(abbrev = vkd3d_malloc(sizeof(*abbrev) + count * sizeof(abbrev->operands[0])))) ++ { ++ ERR("Failed to allocate block abbreviation.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if ((ret = dxil_abbrev_init(abbrev, count, sm6)) < 0) ++ { ++ vkd3d_free(abbrev); ++ return ret; ++ } ++ ++ block->abbrevs[block->abbrev_count++] = abbrev; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result sm6_parser_read_abbrev_record(struct sm6_parser *sm6, unsigned int abbrev_id) ++{ ++ enum vkd3d_result ret = VKD3D_ERROR_INVALID_SHADER; ++ struct dxil_block *block = sm6->current_block; ++ struct dxil_record *temp, *record; ++ unsigned int i, count, array_len; ++ struct dxil_abbrev *abbrev; ++ uint64_t code; ++ ++ if (abbrev_id >= block->abbrev_count) ++ { ++ WARN("Invalid abbreviation id %u.\n", abbrev_id); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ abbrev = block->abbrevs[abbrev_id]; ++ if (!(count = abbrev->count)) ++ return VKD3D_OK; ++ if (count == 1 && abbrev->is_array) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ /* First operand is the record code. The array is included in the count, but will be done separately. */ ++ count -= abbrev->is_array + 1; ++ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) ++ { ++ ERR("Failed to allocate record with %u operands.\n", count); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if (!abbrev->operands[0].read_operand(sm6, abbrev->operands[0].context, &code)) ++ goto fail; ++ if (code > UINT_MAX) ++ FIXME("Truncating 64-bit record code %#"PRIx64".\n", code); ++ record->code = code; ++ ++ for (i = 0; i < count; ++i) ++ if (!abbrev->operands[i + 1].read_operand(sm6, abbrev->operands[i + 1].context, &record->operands[i])) ++ goto fail; ++ record->operand_count = count; ++ ++ /* An array can occur only as the last operand. */ ++ if (abbrev->is_array) ++ { ++ array_len = sm6_parser_read_vbr(sm6, 6); ++ if (!(temp = vkd3d_realloc(record, sizeof(*record) + (count + array_len) * sizeof(record->operands[0])))) ++ { ++ ERR("Failed to allocate record with %u operands.\n", count + array_len); ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ goto fail; ++ } ++ record = temp; ++ ++ for (i = 0; i < array_len; ++i) ++ { ++ if (!abbrev->operands[count + 1].read_operand(sm6, abbrev->operands[count + 1].context, ++ &record->operands[count + i])) ++ { ++ goto fail; ++ } ++ } ++ record->operand_count += array_len; ++ } ++ ++ if ((ret = dxil_block_add_record(block, record)) < 0) ++ goto fail; ++ ++ return VKD3D_OK; ++ ++fail: ++ vkd3d_free(record); ++ return ret; ++} ++ ++static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, ++ struct sm6_parser *sm6); ++ ++static enum vkd3d_result dxil_block_read(struct dxil_block *parent, struct sm6_parser *sm6) ++{ ++ unsigned int reserve = (parent->id == MODULE_BLOCK) ? 12 : 2; ++ struct dxil_block *block; ++ enum vkd3d_result ret; ++ ++ sm6->current_block = parent; ++ ++ do ++ { ++ unsigned int abbrev_id = sm6_parser_read_bits(sm6, parent->abbrev_len); ++ ++ switch (abbrev_id) ++ { ++ case END_BLOCK: ++ sm6_parser_align_32(sm6); ++ return VKD3D_OK; ++ ++ case ENTER_SUBBLOCK: ++ if (parent->id != MODULE_BLOCK && parent->id != FUNCTION_BLOCK) ++ { ++ WARN("Invalid subblock parent id %u.\n", parent->id); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!vkd3d_array_reserve((void **)&parent->child_blocks, &parent->child_block_capacity, ++ max(reserve, parent->child_block_count + 1), sizeof(*parent->child_blocks)) ++ || !(block = vkd3d_calloc(1, sizeof(*block)))) ++ { ++ ERR("Failed to allocate block.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if ((ret = dxil_block_init(block, parent, sm6)) < 0) ++ { ++ vkd3d_free(block); ++ return ret; ++ } ++ ++ parent->child_blocks[parent->child_block_count++] = block; ++ sm6->current_block = parent; ++ break; ++ ++ case DEFINE_ABBREV: ++ if ((ret = sm6_parser_add_block_abbrev(sm6)) < 0) ++ return ret; ++ break; ++ ++ case UNABBREV_RECORD: ++ if ((ret = sm6_parser_read_unabbrev_record(sm6)) < 0) ++ { ++ WARN("Failed to read unabbreviated record.\n"); ++ return ret; ++ } ++ break; ++ ++ default: ++ if ((ret = sm6_parser_read_abbrev_record(sm6, abbrev_id - 4)) < 0) ++ { ++ WARN("Failed to read abbreviated record.\n"); ++ return ret; ++ } ++ break; ++ } ++ } while (!sm6->p.failed); ++ ++ return VKD3D_ERROR_INVALID_SHADER; ++} ++ ++static size_t sm6_parser_compute_global_abbrev_count_for_block_id(struct sm6_parser *sm6, ++ unsigned int block_id) ++{ ++ size_t i, count; ++ ++ for (i = 0, count = 0; i < sm6->abbrev_count; ++i) ++ count += sm6->abbrevs[i]->block_id == block_id; ++ ++ return count; ++} ++ ++static void dxil_block_destroy(struct dxil_block *block) ++{ ++ size_t i; ++ ++ for (i = 0; i < block->record_count; ++i) ++ vkd3d_free(block->records[i]); ++ vkd3d_free(block->records); ++ ++ for (i = 0; i < block->child_block_count; ++i) ++ { ++ dxil_block_destroy(block->child_blocks[i]); ++ vkd3d_free(block->child_blocks[i]); ++ } ++ vkd3d_free(block->child_blocks); ++ ++ block->records = NULL; ++ block->record_count = 0; ++ block->child_blocks = NULL; ++ block->child_block_count = 0; ++} ++ ++static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, ++ struct sm6_parser *sm6) ++{ ++ size_t i, abbrev_count = 0; ++ enum vkd3d_result ret; ++ ++ block->parent = parent; ++ block->level = parent ? parent->level + 1 : 0; ++ block->id = sm6_parser_read_vbr(sm6, 8); ++ block->abbrev_len = sm6_parser_read_vbr(sm6, 4); ++ sm6_parser_align_32(sm6); ++ block->length = sm6_parser_read_uint32(sm6); ++ block->start = sm6->ptr - sm6->start; ++ ++ if (sm6->p.failed) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if ((block->abbrev_count = sm6_parser_compute_global_abbrev_count_for_block_id(sm6, block->id))) ++ { ++ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, ++ block->abbrev_count, sizeof(*block->abbrevs))) ++ { ++ ERR("Failed to allocate block abbreviations.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ for (i = 0; i < sm6->abbrev_count; ++i) ++ if (sm6->abbrevs[i]->block_id == block->id) ++ block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; ++ ++ assert(abbrev_count == block->abbrev_count); ++ } ++ ++ if ((ret = dxil_block_read(block, sm6)) < 0) ++ dxil_block_destroy(block); ++ ++ for (i = abbrev_count; i < block->abbrev_count; ++i) ++ vkd3d_free(block->abbrevs[i]); ++ vkd3d_free(block->abbrevs); ++ block->abbrevs = NULL; ++ block->abbrev_count = 0; ++ ++ return ret; ++} ++ ++static size_t dxil_block_compute_function_count(const struct dxil_block *root) ++{ ++ size_t i, count; ++ ++ for (i = 0, count = 0; i < root->child_block_count; ++i) ++ count += root->child_blocks[i]->id == FUNCTION_BLOCK; ++ ++ return count; ++} ++ ++static size_t dxil_block_compute_module_decl_count(const struct dxil_block *block) ++{ ++ size_t i, count; ++ ++ for (i = 0, count = 0; i < block->record_count; ++i) ++ count += block->records[i]->code == MODULE_CODE_FUNCTION; ++ return count; ++} ++ ++static size_t dxil_block_compute_constants_count(const struct dxil_block *block) ++{ ++ size_t i, count; ++ ++ for (i = 0, count = 0; i < block->record_count; ++i) ++ count += block->records[i]->code != CST_CODE_SETTYPE; ++ return count; ++} ++ ++static void dxil_global_abbrevs_cleanup(struct dxil_global_abbrev **abbrevs, size_t count) ++{ ++ size_t i; ++ ++ for (i = 0; i < count; ++i) ++ vkd3d_free(abbrevs[i]); ++ vkd3d_free(abbrevs); ++} ++ ++static const struct dxil_block *sm6_parser_get_level_one_block(const struct sm6_parser *sm6, ++ enum bitcode_block_id id, bool *is_unique) ++{ ++ const struct dxil_block *block, *found = NULL; ++ size_t i; ++ ++ for (i = 0, *is_unique = true; i < sm6->root_block.child_block_count; ++i) ++ { ++ block = sm6->root_block.child_blocks[i]; ++ if (block->id != id) ++ continue; ++ ++ if (!found) ++ found = block; ++ else ++ *is_unique = false; ++ } ++ ++ return found; ++} ++ ++static char *dxil_record_to_string(const struct dxil_record *record, unsigned int offset) ++{ ++ unsigned int i; ++ char *str; ++ ++ assert(offset <= record->operand_count); ++ if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) ++ return NULL; ++ ++ for (i = offset; i < record->operand_count; ++i) ++ str[i - offset] = record->operands[i]; ++ ++ return str; ++} ++ ++static bool dxil_record_validate_operand_min_count(const struct dxil_record *record, unsigned int min_count, ++ struct sm6_parser *sm6) ++{ ++ if (record->operand_count >= min_count) ++ return true; ++ ++ WARN("Invalid operand count %u for code %u.\n", record->operand_count, record->code); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "Invalid operand count %u for record code %u.", record->operand_count, record->code); ++ return false; ++} ++ ++static void dxil_record_validate_operand_max_count(const struct dxil_record *record, unsigned int max_count, ++ struct sm6_parser *sm6) ++{ ++ if (record->operand_count <= max_count) ++ return; ++ ++ WARN("Ignoring %u extra operands for code %u.\n", record->operand_count - max_count, record->code); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring %u extra operands for record code %u.", record->operand_count - max_count, record->code); ++} ++ ++static bool dxil_record_validate_operand_count(const struct dxil_record *record, unsigned int min_count, ++ unsigned int max_count, struct sm6_parser *sm6) ++{ ++ dxil_record_validate_operand_max_count(record, max_count, sm6); ++ return dxil_record_validate_operand_min_count(record, min_count, sm6); ++} ++ ++static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) ++{ ++ const struct dxil_record *record; ++ size_t i, type_count, type_index; ++ const struct dxil_block *block; ++ char *struct_name = NULL; ++ unsigned int j, count; ++ struct sm6_type *type; ++ uint64_t type_id; ++ bool is_unique; ++ ++ sm6->p.location.line = 0; ++ sm6->p.location.column = 0; ++ ++ if (!(block = sm6_parser_get_level_one_block(sm6, TYPE_BLOCK, &is_unique))) ++ { ++ WARN("No type definitions found.\n"); ++ return VKD3D_OK; ++ } ++ if (!is_unique) ++ WARN("Ignoring invalid extra type table(s).\n"); ++ ++ sm6->p.location.line = block->id; ++ ++ type_count = 0; ++ for (i = 0; i < block->record_count; ++i) ++ type_count += block->records[i]->code != TYPE_CODE_NUMENTRY && block->records[i]->code != TYPE_CODE_STRUCT_NAME; ++ ++ /* The type array must not be relocated. */ ++ if (!(sm6->types = vkd3d_calloc(type_count, sizeof(*sm6->types)))) ++ { ++ ERR("Failed to allocate type array.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ for (i = 0; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ record = block->records[i]; ++ ++ type = &sm6->types[sm6->type_count]; ++ type_index = sm6->type_count; ++ ++ switch (record->code) ++ { ++ case TYPE_CODE_ARRAY: ++ case TYPE_CODE_VECTOR: ++ if (!dxil_record_validate_operand_count(record, 2, 2, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ type->class = record->code == TYPE_CODE_ARRAY ? TYPE_CLASS_ARRAY : TYPE_CLASS_VECTOR; ++ ++ if (!(type->u.array.count = record->operands[0])) ++ { ++ TRACE("Setting unbounded for type %zu.\n", type_index); ++ type->u.array.count = UINT_MAX; ++ } ++ ++ if ((type_id = record->operands[1]) >= type_count) ++ { ++ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.array.elem_type = &sm6->types[type_id]; ++ break; ++ ++ case TYPE_CODE_DOUBLE: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ type->class = TYPE_CLASS_FLOAT; ++ type->u.width = 64; ++ break; ++ ++ case TYPE_CODE_FLOAT: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ type->class = TYPE_CLASS_FLOAT; ++ type->u.width = 32; ++ break; ++ ++ case TYPE_CODE_FUNCTION: ++ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ if (record->operands[0]) ++ FIXME("Unhandled vararg function type %zu.\n", type_index); ++ ++ type->class = TYPE_CLASS_FUNCTION; ++ ++ if ((type_id = record->operands[1]) >= type_count) ++ { ++ WARN("Invalid return type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ count = record->operand_count - 2; ++ if (vkd3d_object_range_overflow(sizeof(type->u.function), count, sizeof(type->u.function->param_types[0])) ++ || !(type->u.function = vkd3d_malloc(offsetof(struct sm6_function_info, param_types[count])))) ++ { ++ ERR("Failed to allocate function parameter types.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ type->u.function->ret_type = &sm6->types[type_id]; ++ type->u.function->param_count = count; ++ for (j = 0; j < count; ++j) ++ { ++ if ((type_id = record->operands[j + 2]) >= type_count) ++ { ++ WARN("Invalid parameter type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ vkd3d_free(type->u.function); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.function->param_types[j] = &sm6->types[type_id]; ++ } ++ break; ++ ++ case TYPE_CODE_HALF: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ type->class = TYPE_CLASS_FLOAT; ++ type->u.width = 16; ++ break; ++ ++ case TYPE_CODE_INTEGER: ++ { ++ uint64_t width; ++ ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ type->class = TYPE_CLASS_INTEGER; ++ ++ switch ((width = record->operands[0])) ++ { ++ case 1: ++ case 8: ++ case 16: ++ case 32: ++ case 64: ++ break; ++ default: ++ WARN("Invalid integer width %"PRIu64" for type %zu.\n", width, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.width = width; ++ break; ++ } ++ ++ case TYPE_CODE_LABEL: ++ type->class = TYPE_CLASS_LABEL; ++ break; ++ ++ case TYPE_CODE_METADATA: ++ type->class = TYPE_CLASS_METADATA; ++ break; ++ ++ case TYPE_CODE_NUMENTRY: ++ continue; ++ ++ case TYPE_CODE_POINTER: ++ if (!dxil_record_validate_operand_count(record, 1, 2, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ type->class = TYPE_CLASS_POINTER; ++ ++ if ((type_id = record->operands[0]) >= type_count) ++ { ++ WARN("Invalid pointee type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.pointer.type = &sm6->types[type_id]; ++ type->u.pointer.addr_space = (record->operand_count > 1) ? record->operands[1] : ADDRESS_SPACE_DEFAULT; ++ break; ++ ++ case TYPE_CODE_STRUCT_ANON: ++ case TYPE_CODE_STRUCT_NAMED: ++ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ if (record->code == TYPE_CODE_STRUCT_NAMED && !struct_name) ++ { ++ WARN("Missing struct name before struct type %zu.\n", type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ type->class = TYPE_CLASS_STRUCT; ++ ++ count = record->operand_count - 1; ++ if (vkd3d_object_range_overflow(sizeof(type->u.struc), count, sizeof(type->u.struc->elem_types[0])) ++ || !(type->u.struc = vkd3d_malloc(offsetof(struct sm6_struct_info, elem_types[count])))) ++ { ++ ERR("Failed to allocate struct element types.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if (record->operands[0]) ++ FIXME("Ignoring struct packed attribute.\n"); ++ ++ type->u.struc->elem_count = count; ++ for (j = 0; j < count; ++j) ++ { ++ if ((type_id = record->operands[j + 1]) >= type_count) ++ { ++ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ vkd3d_free(type->u.struc); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.struc->elem_types[j] = &sm6->types[type_id]; ++ } ++ ++ if (record->code == TYPE_CODE_STRUCT_ANON) ++ { ++ type->u.struc->name = NULL; ++ break; ++ } ++ ++ type->u.struc->name = struct_name; ++ struct_name = NULL; ++ break; ++ ++ case TYPE_CODE_STRUCT_NAME: ++ if (!(struct_name = dxil_record_to_string(record, 0))) ++ { ++ ERR("Failed to allocate struct name.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ if (!struct_name[0]) ++ WARN("Struct name is empty for type %zu.\n", type_index); ++ continue; ++ ++ case TYPE_CODE_VOID: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ type->class = TYPE_CLASS_VOID; ++ break; ++ ++ default: ++ FIXME("Unhandled type %u at index %zu.\n", record->code, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++sm6->type_count; ++ } ++ ++ assert(sm6->type_count == type_count); ++ ++ if (struct_name) ++ { ++ WARN("Unused struct name %s.\n", struct_name); ++ vkd3d_free(struct_name); ++ } ++ ++ return VKD3D_OK; ++} ++ ++static inline bool sm6_type_is_void(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_VOID; ++} ++ ++static inline bool sm6_type_is_integer(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_INTEGER; ++} ++ ++static inline bool sm6_type_is_floating_point(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_FLOAT; ++} ++ ++static inline bool sm6_type_is_numeric(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_INTEGER || type->class == TYPE_CLASS_FLOAT; ++} ++ ++static inline bool sm6_type_is_pointer(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_POINTER; ++} ++ ++static bool sm6_type_is_numeric_aggregate(const struct sm6_type *type) ++{ ++ unsigned int i; ++ ++ switch (type->class) ++ { ++ case TYPE_CLASS_ARRAY: ++ case TYPE_CLASS_VECTOR: ++ return sm6_type_is_numeric(type->u.array.elem_type); ++ ++ case TYPE_CLASS_STRUCT: ++ /* Do not handle nested structs. Support can be added if they show up. */ ++ for (i = 0; i < type->u.struc->elem_count; ++i) ++ if (!sm6_type_is_numeric(type->u.struc->elem_types[i])) ++ return false; ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++static inline bool sm6_type_is_struct(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_STRUCT; ++} ++ ++static inline bool sm6_type_is_function(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_FUNCTION; ++} ++ ++static inline bool sm6_type_is_function_pointer(const struct sm6_type *type) ++{ ++ return sm6_type_is_pointer(type) && sm6_type_is_function(type->u.pointer.type); ++} ++ ++static inline bool sm6_type_is_handle(const struct sm6_type *type) ++{ ++ return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Handle"); ++} ++ ++static inline const struct sm6_type *sm6_type_get_element_type(const struct sm6_type *type) ++{ ++ return (type->class == TYPE_CLASS_ARRAY || type->class == TYPE_CLASS_VECTOR) ? type->u.array.elem_type : type; ++} ++ ++static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type *type, ++ enum bitcode_address_space addr_space, struct sm6_parser *sm6) ++{ ++ size_t i, start = type - sm6->types; ++ const struct sm6_type *pointer_type; ++ ++ /* DXC seems usually to place the pointer type immediately after its pointee. */ ++ for (i = (start + 1) % sm6->type_count; i != start; i = (i + 1) % sm6->type_count) ++ { ++ pointer_type = &sm6->types[i]; ++ if (sm6_type_is_pointer(pointer_type) && pointer_type->u.pointer.type == type ++ && pointer_type->u.pointer.addr_space == addr_space) ++ return pointer_type; ++ } ++ ++ return NULL; ++} ++ ++static const struct sm6_type *sm6_parser_get_type(struct sm6_parser *sm6, uint64_t type_id) ++{ ++ if (type_id >= sm6->type_count) ++ { ++ WARN("Invalid type index %"PRIu64" at %zu.\n", type_id, sm6->value_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID, ++ "DXIL type id %"PRIu64" is invalid.", type_id); ++ return NULL; ++ } ++ return &sm6->types[type_id]; ++} ++ ++static int global_symbol_compare(const void *a, const void *b) ++{ ++ return vkd3d_u32_compare(((const struct sm6_symbol *)a)->id, ((const struct sm6_symbol *)b)->id); ++} ++ ++static enum vkd3d_result sm6_parser_symtab_init(struct sm6_parser *sm6) ++{ ++ const struct dxil_record *record; ++ const struct dxil_block *block; ++ struct sm6_symbol *symbol; ++ size_t i, count; ++ bool is_unique; ++ ++ sm6->p.location.line = 0; ++ sm6->p.location.column = 0; ++ ++ if (!(block = sm6_parser_get_level_one_block(sm6, VALUE_SYMTAB_BLOCK, &is_unique))) ++ { ++ /* There should always be at least one symbol: the name of the entry point function. */ ++ WARN("No value symtab block found.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (!is_unique) ++ FIXME("Ignoring extra value symtab block(s).\n"); ++ ++ sm6->p.location.line = block->id; ++ ++ for (i = 0, count = 0; i < block->record_count; ++i) ++ count += block->records[i]->code == VST_CODE_ENTRY; ++ ++ if (!(sm6->global_symbols = vkd3d_calloc(count, sizeof(*sm6->global_symbols)))) ++ { ++ ERR("Failed to allocate global symbols.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ for (i = 0; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ record = block->records[i]; ++ ++ if (record->code != VST_CODE_ENTRY) ++ { ++ FIXME("Unhandled symtab code %u.\n", record->code); ++ continue; ++ } ++ if (!dxil_record_validate_operand_min_count(record, 1, sm6)) ++ continue; ++ ++ symbol = &sm6->global_symbols[sm6->global_symbol_count]; ++ symbol->id = record->operands[0]; ++ if (!(symbol->name = dxil_record_to_string(record, 1))) ++ { ++ ERR("Failed to allocate symbol name.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++sm6->global_symbol_count; ++ } ++ ++ sm6->p.location.column = block->record_count; ++ ++ qsort(sm6->global_symbols, sm6->global_symbol_count, sizeof(*sm6->global_symbols), global_symbol_compare); ++ for (i = 1; i < sm6->global_symbol_count; ++i) ++ { ++ if (sm6->global_symbols[i].id == sm6->global_symbols[i - 1].id) ++ { ++ WARN("Invalid duplicate symbol id %u.\n", sm6->global_symbols[i].id); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ ++static const char *sm6_parser_get_global_symbol_name(const struct sm6_parser *sm6, size_t id) ++{ ++ size_t i, start; ++ ++ /* id == array index is normally true */ ++ i = start = id % sm6->global_symbol_count; ++ do ++ { ++ if (sm6->global_symbols[i].id == id) ++ return sm6->global_symbols[i].name; ++ i = (i + 1) % sm6->global_symbol_count; ++ } while (i != start); ++ ++ return NULL; ++} ++ ++static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) ++{ ++ assert(fn->value_type == VALUE_TYPE_FUNCTION); ++ return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); ++} ++ ++static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) ++{ ++ assert(sm6->value_count < sm6->value_capacity); ++ return &sm6->values[sm6->value_count]; ++} ++ ++static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type *type) ++{ ++ if (type->class == TYPE_CLASS_INTEGER) ++ { ++ switch (type->u.width) ++ { ++ case 8: ++ return VKD3D_DATA_UINT8; ++ case 32: ++ return VKD3D_DATA_UINT; ++ default: ++ FIXME("Unhandled width %u.\n", type->u.width); ++ return VKD3D_DATA_UINT; ++ } ++ } ++ else if (type->class == TYPE_CLASS_FLOAT) ++ { ++ switch (type->u.width) ++ { ++ case 32: ++ return VKD3D_DATA_FLOAT; ++ case 64: ++ return VKD3D_DATA_DOUBLE; ++ default: ++ FIXME("Unhandled width %u.\n", type->u.width); ++ return VKD3D_DATA_FLOAT; ++ } ++ } ++ ++ FIXME("Unhandled type %u.\n", type->class); ++ return VKD3D_DATA_UINT; ++} ++ ++/* Recurse through the block tree while maintaining a current value count. The current ++ * count is the sum of the global count plus all declarations within the current function. ++ * Store into value_capacity the highest count seen. */ ++static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, ++ const struct dxil_block *block, size_t value_count) ++{ ++ size_t i, old_value_count = value_count; ++ ++ if (block->id == MODULE_BLOCK) ++ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_module_decl_count(block)); ++ ++ for (i = 0; i < block->child_block_count; ++i) ++ value_count = sm6_parser_compute_max_value_count(sm6, block->child_blocks[i], value_count); ++ ++ switch (block->id) ++ { ++ case CONSTANTS_BLOCK: ++ /* Function local constants are contained in a child block of the function block. */ ++ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_constants_count(block)); ++ break; ++ case FUNCTION_BLOCK: ++ /* A function must start with a block count, which emits no value. This formula is likely to ++ * overestimate the value count somewhat, but this should be no problem. */ ++ value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); ++ sm6->value_capacity = max(sm6->value_capacity, value_count); ++ /* The value count returns to its previous value after handling a function. */ ++ if (value_count < SIZE_MAX) ++ value_count = old_value_count; ++ break; ++ default: ++ break; ++ } ++ ++ return value_count; ++} ++ ++static bool sm6_parser_declare_function(struct sm6_parser *sm6, const struct dxil_record *record) ++{ ++ const unsigned int max_count = 15; ++ const struct sm6_type *ret_type; ++ struct sm6_value *fn; ++ unsigned int i, j; ++ ++ if (!dxil_record_validate_operand_count(record, 8, max_count, sm6)) ++ return false; ++ ++ fn = sm6_parser_get_current_value(sm6); ++ fn->value_type = VALUE_TYPE_FUNCTION; ++ if (!(fn->u.function.name = sm6_parser_get_global_symbol_name(sm6, sm6->value_count))) ++ { ++ WARN("Missing symbol name for function %zu.\n", sm6->value_count); ++ fn->u.function.name = ""; ++ } ++ ++ if (!(fn->type = sm6_parser_get_type(sm6, record->operands[0]))) ++ return false; ++ if (!sm6_type_is_function(fn->type)) ++ { ++ WARN("Type is not a function.\n"); ++ return false; ++ } ++ ret_type = fn->type->u.function->ret_type; ++ ++ if (!(fn->type = sm6_type_get_pointer_to_type(fn->type, ADDRESS_SPACE_DEFAULT, sm6))) ++ { ++ WARN("Failed to get pointer type for type %u.\n", fn->type->class); ++ return false; ++ } ++ ++ if (record->operands[1]) ++ WARN("Ignoring calling convention %#"PRIx64".\n", record->operands[1]); ++ ++ fn->u.function.is_prototype = !!record->operands[2]; ++ ++ if (record->operands[3]) ++ WARN("Ignoring linkage %#"PRIx64".\n", record->operands[3]); ++ ++ if (record->operands[4] > UINT_MAX) ++ WARN("Invalid attributes id %#"PRIx64".\n", record->operands[4]); ++ /* 1-based index. */ ++ if ((fn->u.function.attribs_id = record->operands[4])) ++ TRACE("Ignoring function attributes.\n"); ++ ++ /* These always seem to be zero. */ ++ for (i = 5, j = 0; i < min(record->operand_count, max_count); ++i) ++ j += !!record->operands[i]; ++ if (j) ++ WARN("Ignoring %u operands.\n", j); ++ ++ if (sm6_value_is_dx_intrinsic_dcl(fn) && !sm6_type_is_void(ret_type) && !sm6_type_is_numeric(ret_type) ++ && !sm6_type_is_numeric_aggregate(ret_type) && !sm6_type_is_handle(ret_type)) ++ { ++ WARN("Unexpected return type for dx intrinsic function '%s'.\n", fn->u.function.name); ++ } ++ ++ ++sm6->value_count; ++ ++ return true; ++} ++ ++static inline uint64_t decode_rotated_signed_value(uint64_t value) ++{ ++ if (value != 1) ++ { ++ bool neg = value & 1; ++ value >>= 1; ++ return neg ? -value : value; ++ } ++ return value << 63; ++} ++ ++static inline float bitcast_uint64_to_float(uint64_t value) ++{ ++ union ++ { ++ uint32_t uint32_value; ++ float float_value; ++ } u; ++ ++ u.uint32_value = value; ++ return u.float_value; ++} ++ ++static inline double bitcast_uint64_to_double(uint64_t value) ++{ ++ union ++ { ++ uint64_t uint64_value; ++ double double_value; ++ } u; ++ ++ u.uint64_value = value; ++ return u.double_value; ++} ++ ++static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) ++{ ++ enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; ++ const struct sm6_type *type, *elem_type; ++ enum vkd3d_data_type reg_data_type; ++ const struct dxil_record *record; ++ struct sm6_value *dst; ++ size_t i, value_idx; ++ uint64_t value; ++ ++ for (i = 0, type = NULL; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ record = block->records[i]; ++ value_idx = sm6->value_count; ++ ++ if (record->code == CST_CODE_SETTYPE) ++ { ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if (!(type = sm6_parser_get_type(sm6, record->operands[0]))) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ elem_type = sm6_type_get_element_type(type); ++ if (sm6_type_is_numeric(elem_type)) ++ { ++ reg_data_type = vkd3d_data_type_from_sm6_type(elem_type); ++ reg_type = elem_type->u.width > 32 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST; ++ } ++ else ++ { ++ reg_data_type = VKD3D_DATA_UNUSED; ++ reg_type = VKD3DSPR_INVALID; ++ } ++ ++ if (i == block->record_count - 1) ++ WARN("Unused SETTYPE record.\n"); ++ ++ continue; ++ } ++ ++ if (!type) ++ { ++ WARN("Constant record %zu has no type.\n", value_idx); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ dst = sm6_parser_get_current_value(sm6); ++ dst->type = type; ++ dst->value_type = VALUE_TYPE_REG; ++ dst->u.reg.type = reg_type; ++ dst->u.reg.immconst_type = VKD3D_IMMCONST_SCALAR; ++ dst->u.reg.data_type = reg_data_type; ++ ++ switch (record->code) ++ { ++ case CST_CODE_NULL: ++ /* Register constant data is already zero-filled. */ ++ break; ++ ++ case CST_CODE_INTEGER: ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if (!sm6_type_is_integer(type)) ++ { ++ WARN("Invalid integer of non-integer type %u at constant idx %zu.\n", type->class, value_idx); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ value = decode_rotated_signed_value(record->operands[0]); ++ if (type->u.width <= 32) ++ dst->u.reg.u.immconst_uint[0] = value & ((1ull << type->u.width) - 1); ++ else ++ dst->u.reg.u.immconst_uint64[0] = value; ++ ++ break; ++ ++ case CST_CODE_FLOAT: ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if (!sm6_type_is_floating_point(type)) ++ { ++ WARN("Invalid float of non-fp type %u at constant idx %zu.\n", type->class, value_idx); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (type->u.width == 16) ++ FIXME("Half float type is not supported yet.\n"); ++ else if (type->u.width == 32) ++ dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); ++ else if (type->u.width == 64) ++ dst->u.reg.u.immconst_double[0] = bitcast_uint64_to_double(record->operands[0]); ++ else ++ vkd3d_unreachable(); ++ ++ break; ++ ++ case CST_CODE_DATA: ++ WARN("Unhandled constant array.\n"); ++ break; ++ ++ case CST_CODE_UNDEF: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ dst->u.reg.type = VKD3DSPR_UNDEF; ++ /* Mark as explicitly undefined, not the result of a missing constant code or instruction. */ ++ dst->is_undefined = true; ++ break; ++ ++ default: ++ FIXME("Unhandled constant code %u.\n", record->code); ++ dst->u.reg.type = VKD3DSPR_UNDEF; ++ break; ++ } ++ ++ ++sm6->value_count; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) ++{ ++ if (!shader_instruction_array_reserve(&sm6->p.instructions, sm6->p.instructions.count + extra)) ++ { ++ ERR("Failed to allocate instruction.\n"); ++ return NULL; ++ } ++ return &sm6->p.instructions.elements[sm6->p.instructions.count]; ++} ++ ++/* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ ++static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_parser *sm6, ++ enum vkd3d_shader_opcode handler_idx) ++{ ++ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); ++ assert(ins); ++ shader_instruction_init(ins, handler_idx); ++ ++sm6->p.instructions.count; ++ return ins; ++} ++ ++static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) ++{ ++ const struct dxil_block *block = &sm6->root_block; ++ const struct dxil_record *record; ++ uint64_t version; ++ size_t i; ++ ++ sm6->p.location.line = block->id; ++ sm6->p.location.column = 0; ++ ++ for (i = 0; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ record = block->records[i]; ++ switch (record->code) ++ { ++ case MODULE_CODE_FUNCTION: ++ if (!sm6_parser_declare_function(sm6, record)) ++ { ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL, ++ "A DXIL function declaration is invalid."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ break; ++ ++ case MODULE_CODE_GLOBALVAR: ++ FIXME("Global variables are not implemented yet.\n"); ++ break; ++ ++ case MODULE_CODE_VERSION: ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ if ((version = record->operands[0]) != 1) ++ { ++ FIXME("Unsupported format version %#"PRIx64".\n", version); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT, ++ "Bitcode format version %#"PRIx64" is unsupported.", version); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ break; ++ ++ default: ++ break; ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ ++static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) ++{ ++ size_t i, count = sm6->function_count; ++ ++ for (i = 0; i < sm6->value_count; ++i) ++ { ++ if (sm6_type_is_function_pointer(sm6->values[i].type) && !sm6->values[i].u.function.is_prototype && !count--) ++ break; ++ } ++ if (i == sm6->value_count) ++ return NULL; ++ ++ ++sm6->function_count; ++ return &sm6->values[i]; ++} ++ ++static struct sm6_block *sm6_block_create() ++{ ++ struct sm6_block *block = vkd3d_calloc(1, sizeof(*block)); ++ return block; ++} ++ ++static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record *record, ++ struct sm6_block *code_block, struct vkd3d_shader_instruction *ins) ++{ ++ if (!dxil_record_validate_operand_count(record, 0, 1, sm6)) ++ return; ++ ++ if (record->operand_count) ++ FIXME("Non-void return is not implemented.\n"); ++ ++ ins->handler_idx = VKD3DSIH_NOP; ++} ++ ++static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, ++ struct sm6_function *function) ++{ ++ struct vkd3d_shader_instruction *ins; ++ const struct dxil_record *record; ++ struct sm6_block *code_block; ++ struct sm6_value *dst; ++ size_t i, block_idx; ++ bool ret_found; ++ enum ++ { ++ RESULT_VALUE, ++ RESULT_TERMINATE, ++ } result_type; ++ ++ if (sm6->function_count) ++ { ++ FIXME("Multiple functions are not supported yet.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (!(function->declaration = sm6_parser_next_function_definition(sm6))) ++ { ++ WARN("Failed to find definition to match function body.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (block->record_count < 2) ++ { ++ /* It should contain at least a block count and a RET instruction. */ ++ WARN("Invalid function block record count %zu.\n", block->record_count); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (block->records[0]->code != FUNC_CODE_DECLAREBLOCKS || !block->records[0]->operand_count ++ || block->records[0]->operands[0] > UINT_MAX) ++ { ++ WARN("Block count declaration not found or invalid.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!(function->block_count = block->records[0]->operands[0])) ++ { ++ WARN("Function contains no blocks.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (function->block_count > 1) ++ { ++ FIXME("Branched shaders are not supported yet.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!(function->blocks[0] = sm6_block_create())) ++ { ++ ERR("Failed to allocate code block.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ code_block = function->blocks[0]; ++ ++ for (i = 1, block_idx = 0, ret_found = false; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ ++ /* block->record_count - 1 is the instruction count, but some instructions ++ * can emit >1 IR instruction, so extra may be used. */ ++ if (!vkd3d_array_reserve((void **)&code_block->instructions, &code_block->instruction_capacity, ++ max(code_block->instruction_count + 1, block->record_count), sizeof(*code_block->instructions))) ++ { ++ ERR("Failed to allocate instructions.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ ins = &code_block->instructions[code_block->instruction_count]; ++ ins->handler_idx = VKD3DSIH_INVALID; ++ ++ dst = sm6_parser_get_current_value(sm6); ++ dst->type = NULL; ++ dst->value_type = VALUE_TYPE_REG; ++ result_type = RESULT_VALUE; ++ ++ record = block->records[i]; ++ switch (record->code) ++ { ++ case FUNC_CODE_INST_RET: ++ sm6_parser_emit_ret(sm6, record, code_block, ins); ++ result_type = RESULT_TERMINATE; ++ ret_found = true; ++ break; ++ default: ++ FIXME("Unhandled dxil instruction %u.\n", record->code); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (result_type == RESULT_TERMINATE) ++ { ++ ++block_idx; ++ code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; ++ } ++ if (code_block) ++ code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; ++ else ++ assert(ins->handler_idx == VKD3DSIH_NOP); ++ sm6->value_count += !!dst->type; ++ } ++ ++ if (!ret_found) ++ { ++ WARN("Function contains no RET instruction.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static bool sm6_block_emit_instructions(struct sm6_block *block, struct sm6_parser *sm6) ++{ ++ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, block->instruction_count + 1); ++ ++ if (!ins) ++ return false; ++ ++ memcpy(ins, block->instructions, block->instruction_count * sizeof(*block->instructions)); ++ sm6->p.instructions.count += block->instruction_count; ++ ++ sm6_parser_add_instruction(sm6, VKD3DSIH_RET); ++ ++ return true; ++} ++ ++static enum vkd3d_result sm6_parser_module_init(struct sm6_parser *sm6, const struct dxil_block *block, ++ unsigned int level) ++{ ++ size_t i, old_value_count = sm6->value_count; ++ struct sm6_function *function; ++ enum vkd3d_result ret; ++ ++ for (i = 0; i < block->child_block_count; ++i) ++ { ++ if ((ret = sm6_parser_module_init(sm6, block->child_blocks[i], level + 1)) < 0) ++ return ret; ++ } ++ ++ sm6->p.location.line = block->id; ++ sm6->p.location.column = 0; ++ ++ switch (block->id) ++ { ++ case CONSTANTS_BLOCK: ++ return sm6_parser_constants_init(sm6, block); ++ ++ case FUNCTION_BLOCK: ++ function = &sm6->functions[sm6->function_count]; ++ if ((ret = sm6_parser_function_init(sm6, block, function)) < 0) ++ return ret; ++ /* The value index returns to its previous value after handling a function. It's usually nonzero ++ * at the start because of global constants/variables/function declarations. Function constants ++ * occur in a child block, so value_count is already saved before they are emitted. */ ++ memset(&sm6->values[old_value_count], 0, (sm6->value_count - old_value_count) * sizeof(*sm6->values)); ++ sm6->value_count = old_value_count; ++ break; ++ ++ case BLOCKINFO_BLOCK: ++ case MODULE_BLOCK: ++ case PARAMATTR_BLOCK: ++ case PARAMATTR_GROUP_BLOCK: ++ case VALUE_SYMTAB_BLOCK: ++ case METADATA_BLOCK: ++ case METADATA_ATTACHMENT_BLOCK: ++ case TYPE_BLOCK: ++ break; ++ ++ default: ++ FIXME("Unhandled block id %u.\n", block->id); ++ break; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static void sm6_type_table_cleanup(struct sm6_type *types, size_t count) ++{ ++ size_t i; ++ ++ if (!types) ++ return; ++ ++ for (i = 0; i < count; ++i) ++ { ++ switch (types[i].class) ++ { ++ case TYPE_CLASS_STRUCT: ++ vkd3d_free((void *)types[i].u.struc->name); ++ vkd3d_free(types[i].u.struc); ++ break; ++ case TYPE_CLASS_FUNCTION: ++ vkd3d_free(types[i].u.function); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ vkd3d_free(types); ++} ++ ++static void sm6_symtab_cleanup(struct sm6_symbol *symbols, size_t count) ++{ ++ size_t i; ++ ++ for (i = 0; i < count; ++i) ++ vkd3d_free((void *)symbols[i].name); ++ vkd3d_free(symbols); ++} ++ ++static void sm6_block_destroy(struct sm6_block *block) ++{ ++ vkd3d_free(block->instructions); ++ vkd3d_free(block); ++} ++ ++static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) ++{ ++ size_t i, j; ++ ++ for (i = 0; i < count; ++i) ++ { ++ for (j = 0; j < functions[i].block_count; ++j) ++ sm6_block_destroy(functions[i].blocks[j]); ++ } ++ vkd3d_free(functions); ++} ++ ++static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) ++{ ++ struct sm6_parser *sm6 = sm6_parser(parser); ++ ++ dxil_block_destroy(&sm6->root_block); ++ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); ++ shader_instruction_array_destroy(&parser->instructions); ++ sm6_type_table_cleanup(sm6->types, sm6->type_count); ++ sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); ++ sm6_functions_cleanup(sm6->functions, sm6->function_count); ++ vkd3d_free(sm6->values); ++ free_shader_desc(&parser->shader_desc); ++ vkd3d_free(sm6); ++} ++ ++static const struct vkd3d_shader_parser_ops sm6_parser_ops = ++{ ++ .parser_destroy = sm6_parser_destroy, ++}; ++ ++static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, ++ const char *source_name, struct vkd3d_shader_message_context *message_context) ++{ ++ const struct vkd3d_shader_location location = {.source_name = source_name}; ++ uint32_t version_token, dxil_version, token_count, magic; ++ unsigned int chunk_offset, chunk_size; ++ size_t count, length, function_count; ++ enum bitcode_block_abbreviation abbr; ++ struct vkd3d_shader_version version; ++ struct dxil_block *block; ++ enum vkd3d_result ret; ++ unsigned int i; ++ ++ count = byte_code_size / sizeof(*byte_code); ++ if (count < 6) ++ { ++ WARN("Invalid data size %zu.\n", byte_code_size); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE, ++ "DXIL chunk size %zu is smaller than the DXIL header size.", byte_code_size); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ version_token = byte_code[0]; ++ TRACE("Compiler version: 0x%08x.\n", version_token); ++ token_count = byte_code[1]; ++ TRACE("Token count: %u.\n", token_count); ++ ++ if (token_count < 6 || count < token_count) ++ { ++ WARN("Invalid token count %u (word count %zu).\n", token_count, count); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, ++ "DXIL chunk token count %#x is invalid (word count %zu).", token_count, count); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (byte_code[2] != TAG_DXIL) ++ WARN("Unknown magic number 0x%08x.\n", byte_code[2]); ++ ++ dxil_version = byte_code[3]; ++ if (dxil_version > 0x102) ++ WARN("Unknown DXIL version: 0x%08x.\n", dxil_version); ++ else ++ TRACE("DXIL version: 0x%08x.\n", dxil_version); ++ ++ chunk_offset = byte_code[4]; ++ if (chunk_offset < 16 || chunk_offset >= byte_code_size) ++ { ++ WARN("Invalid bitcode chunk offset %#x (data size %zu).\n", chunk_offset, byte_code_size); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET, ++ "DXIL bitcode chunk has invalid offset %#x (data size %#zx).", chunk_offset, byte_code_size); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ chunk_size = byte_code[5]; ++ if (chunk_size > byte_code_size - chunk_offset) ++ { ++ WARN("Invalid bitcode chunk size %#x (data size %zu, chunk offset %#x).\n", ++ chunk_size, byte_code_size, chunk_offset); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, ++ "DXIL bitcode chunk has invalid size %#x (data size %#zx, chunk offset %#x).", ++ chunk_size, byte_code_size, chunk_offset); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ sm6->start = (const uint32_t *)((const char*)&byte_code[2] + chunk_offset); ++ if ((magic = sm6->start[0]) != BITCODE_MAGIC) ++ { ++ WARN("Unknown magic number 0x%08x.\n", magic); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER, ++ "DXIL bitcode chunk magic number 0x%08x is not the expected 0x%08x.", magic, BITCODE_MAGIC); ++ } ++ ++ sm6->end = &sm6->start[(chunk_size + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; ++ ++ if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) ++ { ++ FIXME("Unknown shader type %#x.\n", version.type); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE, ++ "Unknown shader type %#x.", version.type); ++ } ++ ++ version.major = VKD3D_SM6_VERSION_MAJOR(version_token); ++ version.minor = VKD3D_SM6_VERSION_MINOR(version_token); ++ ++ if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) ++ { ++ WARN("Initial block abbreviation %u is not ENTER_SUBBLOCK.\n", abbr); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, ++ "DXIL bitcode chunk has invalid initial block abbreviation %u.", abbr); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ /* Estimate instruction count to avoid reallocation in most shaders. */ ++ count = max(token_count, 400) - 400; ++ vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, ++ (count + (count >> 2)) / 2u + 10); ++ sm6->ptr = &sm6->start[1]; ++ sm6->bitpos = 2; ++ ++ block = &sm6->root_block; ++ if ((ret = dxil_block_init(block, NULL, sm6)) < 0) ++ { ++ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory parsing DXIL bitcode chunk."); ++ else if (ret == VKD3D_ERROR_INVALID_SHADER) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, ++ "DXIL bitcode chunk has invalid bitcode."); ++ else ++ vkd3d_unreachable(); ++ return ret; ++ } ++ ++ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); ++ sm6->abbrevs = NULL; ++ sm6->abbrev_count = 0; ++ ++ length = sm6->ptr - sm6->start - block->start; ++ if (length != block->length) ++ { ++ WARN("Invalid block length %zu; expected %u.\n", length, block->length); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH, ++ "Root block ends with length %zu but indicated length is %u.", length, block->length); ++ } ++ if (sm6->ptr != sm6->end) ++ { ++ size_t expected_length = sm6->end - sm6->start; ++ length = sm6->ptr - sm6->start; ++ WARN("Invalid module length %zu; expected %zu.\n", length, expected_length); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH, ++ "Module ends with length %zu but indicated length is %zu.", length, expected_length); ++ } ++ ++ if ((ret = sm6_parser_type_table_init(sm6)) < 0) ++ { ++ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory parsing DXIL type table."); ++ else if (ret == VKD3D_ERROR_INVALID_SHADER) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE, ++ "DXIL type table is invalid."); ++ else ++ vkd3d_unreachable(); ++ return ret; ++ } ++ ++ if ((ret = sm6_parser_symtab_init(sm6)) < 0) ++ { ++ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory parsing DXIL value symbol table."); ++ else if (ret == VKD3D_ERROR_INVALID_SHADER) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB, ++ "DXIL value symbol table is invalid."); ++ else ++ vkd3d_unreachable(); ++ return ret; ++ } ++ ++ function_count = dxil_block_compute_function_count(&sm6->root_block); ++ if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) ++ { ++ ERR("Failed to allocate function array.\n"); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating DXIL function array."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) ++ { ++ WARN("Value array count overflowed.\n"); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "Overflow occurred in the DXIL module value count."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) ++ { ++ ERR("Failed to allocate value array.\n"); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating DXIL value array."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if ((ret = sm6_parser_globals_init(sm6)) < 0) ++ { ++ WARN("Failed to load global declarations.\n"); ++ return ret; ++ } ++ ++ if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) ++ { ++ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory parsing DXIL module."); ++ else if (ret == VKD3D_ERROR_INVALID_SHADER) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "DXIL module is invalid."); ++ else ++ vkd3d_unreachable(); ++ return ret; ++ } ++ ++ for (i = 0; i < sm6->function_count; ++i) ++ { ++ if (!sm6_block_emit_instructions(sm6->functions[i].blocks[0], sm6)) ++ { ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory emitting shader instructions."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ } ++ ++ dxil_block_destroy(&sm6->root_block); ++ ++ return VKD3D_OK; ++} ++ ++int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) ++{ ++ struct vkd3d_shader_desc *shader_desc; ++ uint32_t *byte_code = NULL; ++ struct sm6_parser *sm6; ++ int ret; ++ ++ if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) ++ { ++ ERR("Failed to allocate parser.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ shader_desc = &sm6->p.shader_desc; ++ shader_desc->is_dxil = true; ++ if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, ++ shader_desc)) < 0) ++ { ++ WARN("Failed to extract shader, vkd3d result %d.\n", ret); ++ vkd3d_free(sm6); ++ return ret; ++ } ++ ++ sm6->p.shader_desc = *shader_desc; ++ shader_desc = &sm6->p.shader_desc; ++ ++ if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) ++ { ++ /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC ++ * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ ++ if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) ++ ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); ++ else ++ memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); ++ } ++ ++ ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, ++ compile_info->source_name, message_context); ++ vkd3d_free(byte_code); ++ ++ if (ret < 0) ++ { ++ WARN("Failed to initialise shader parser.\n"); ++ sm6_parser_destroy(&sm6->p); ++ return ret; ++ } ++ ++ *parser = &sm6->p; ++ ++ return ret; ++} +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index ba5bcfbfaf0..8b706e1e667 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -430,6 +430,51 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl + return type; + } + ++unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, ++ enum hlsl_regset regset, unsigned int index) ++{ ++ struct hlsl_type *next_type; ++ unsigned int offset = 0; ++ unsigned int idx; ++ ++ while (!type_is_single_component(type)) ++ { ++ next_type = type; ++ idx = traverse_path_from_component_index(ctx, &next_type, &index); ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ if (regset == HLSL_REGSET_NUMERIC) ++ offset += idx; ++ break; ++ ++ case HLSL_CLASS_STRUCT: ++ offset += type->e.record.fields[idx].reg_offset[regset]; ++ break; ++ ++ case HLSL_CLASS_ARRAY: ++ if (regset == HLSL_REGSET_NUMERIC) ++ offset += idx * align(type->e.array.type->reg_size[regset], 4); ++ else ++ offset += idx * type->e.array.type->reg_size[regset]; ++ break; ++ ++ case HLSL_CLASS_OBJECT: ++ assert(idx == 0); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ type = next_type; ++ } ++ ++ return offset; ++} ++ + static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, + unsigned int path_len) + { +@@ -524,7 +569,9 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de + unsigned int i; + + assert(deref); +- assert(!deref->offset.node); ++ ++ if (deref->offset.node) ++ return deref->data_type; + + type = deref->var->data_type; + for (i = 0; i < deref->path_len; ++i) +@@ -626,6 +673,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba + type->e.array.type = basic_type; + type->dimx = basic_type->dimx; + type->dimy = basic_type->dimy; ++ type->sampler_dim = basic_type->sampler_dim; + hlsl_type_calculate_reg_size(ctx, type); + + list_add_tail(&ctx->types, &type->entry); +@@ -992,20 +1040,31 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem + struct vkd3d_string_buffer *string; + struct hlsl_ir_var *var; + static LONG counter; +- const char *name; + + if (!(string = hlsl_get_string_buffer(ctx))) + return NULL; + vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); +- if (!(name = hlsl_strdup(ctx, string->buffer))) +- { +- hlsl_release_string_buffer(ctx, string); +- return NULL; +- } +- var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); ++ var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc, true); + hlsl_release_string_buffer(ctx, string); ++ return var; ++} ++ ++struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, ++ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope) ++{ ++ struct hlsl_ir_var *var; ++ const char *name_copy; ++ ++ if (!(name_copy = hlsl_strdup(ctx, name))) ++ return NULL; ++ var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); + if (var) +- list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); ++ { ++ if (dummy_scope) ++ list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); ++ else ++ list_add_tail(&ctx->globals->vars, &var->scope_entry); ++ } + return var; + } + +@@ -1432,7 +1491,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v + } + + struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, +- const struct vkd3d_shader_location *loc) ++ struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_jump *jump; + +@@ -1440,6 +1499,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type + return NULL; + init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); + jump->type = type; ++ hlsl_src_from_node(&jump->condition, condition); + return &jump->node; + } + +@@ -1484,7 +1544,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, + hlsl_block_cleanup(dst_block); + return false; + } +- list_add_tail(&dst_block->instrs, &dst->entry); ++ hlsl_block_add_instr(dst_block, dst); + + if (!list_empty(&src->uses)) + { +@@ -1585,9 +1645,9 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma + return dst; + } + +-static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) ++static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) + { +- return hlsl_new_jump(ctx, src->type, &src->node.loc); ++ return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); + } + + static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) +@@ -1728,7 +1788,7 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + return clone_index(ctx, map, hlsl_ir_index(instr)); + + case HLSL_IR_JUMP: +- return clone_jump(ctx, hlsl_ir_jump(instr)); ++ return clone_jump(ctx, map, hlsl_ir_jump(instr)); + + case HLSL_IR_LOAD: + return clone_load(ctx, map, hlsl_ir_load(instr)); +@@ -2065,6 +2125,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + } + } + ++struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, ++ unsigned int index) ++{ ++ struct hlsl_type *type = var->data_type, *current_type; ++ struct vkd3d_string_buffer *buffer; ++ unsigned int element_index; ++ ++ if (!(buffer = hlsl_get_string_buffer(ctx))) ++ return NULL; ++ ++ vkd3d_string_buffer_printf(buffer, "%s", var->name); ++ ++ while (!type_is_single_component(type)) ++ { ++ current_type = type; ++ element_index = traverse_path_from_component_index(ctx, &type, &index); ++ if (current_type->class == HLSL_CLASS_STRUCT) ++ vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); ++ else ++ vkd3d_string_buffer_printf(buffer, "[%u]", element_index); ++ } ++ ++ return buffer; ++} ++ + const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) + { + struct vkd3d_string_buffer *string; +@@ -2123,18 +2208,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) + { + static const char * const names[] = + { +- "HLSL_IR_CALL", +- "HLSL_IR_CONSTANT", +- "HLSL_IR_EXPR", +- "HLSL_IR_IF", +- "HLSL_IR_INDEX", +- "HLSL_IR_LOAD", +- "HLSL_IR_LOOP", +- "HLSL_IR_JUMP", +- "HLSL_IR_RESOURCE_LOAD", +- "HLSL_IR_RESOURCE_STORE", +- "HLSL_IR_STORE", +- "HLSL_IR_SWIZZLE", ++ [HLSL_IR_CALL ] = "HLSL_IR_CALL", ++ [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", ++ [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", ++ [HLSL_IR_IF ] = "HLSL_IR_IF", ++ [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", ++ [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", ++ [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", ++ [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", ++ [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", ++ [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", ++ [HLSL_IR_STORE ] = "HLSL_IR_STORE", ++ [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + }; + + if (type >= ARRAY_SIZE(names)) +@@ -2146,10 +2231,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) + { + static const char * const names[] = + { +- "HLSL_IR_JUMP_BREAK", +- "HLSL_IR_JUMP_CONTINUE", +- "HLSL_IR_JUMP_DISCARD", +- "HLSL_IR_JUMP_RETURN", ++ [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", ++ [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", ++ [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", ++ [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", ++ [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", + }; + + assert(type < ARRAY_SIZE(names)); +@@ -2158,11 +2244,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) + + static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr); + +-static void dump_instr_list(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct list *list) ++static void dump_block(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_block *block) + { + struct hlsl_ir_node *instr; + +- LIST_FOR_EACH_ENTRY(instr, list, struct hlsl_ir_node, entry) ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + dump_instr(ctx, buffer, instr); + vkd3d_string_buffer_printf(buffer, "\n"); +@@ -2337,7 +2423,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP1_COS] = "cos", + [HLSL_OP1_COS_REDUCED] = "cos_reduced", + [HLSL_OP1_DSX] = "dsx", ++ [HLSL_OP1_DSX_COARSE] = "dsx_coarse", ++ [HLSL_OP1_DSX_FINE] = "dsx_fine", + [HLSL_OP1_DSY] = "dsy", ++ [HLSL_OP1_DSY_COARSE] = "dsy_coarse", ++ [HLSL_OP1_DSY_FINE] = "dsy_fine", + [HLSL_OP1_EXP2] = "exp2", + [HLSL_OP1_FRACT] = "fract", + [HLSL_OP1_LOG2] = "log2", +@@ -2400,9 +2490,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + vkd3d_string_buffer_printf(buffer, "if ("); + dump_src(buffer, &if_node->condition); + vkd3d_string_buffer_printf(buffer, ") {\n"); +- dump_instr_list(ctx, buffer, &if_node->then_block.instrs); ++ dump_block(ctx, buffer, &if_node->then_block); + vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); +- dump_instr_list(ctx, buffer, &if_node->else_block.instrs); ++ dump_block(ctx, buffer, &if_node->else_block); + vkd3d_string_buffer_printf(buffer, " %10s }", ""); + } + +@@ -2418,8 +2508,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i + vkd3d_string_buffer_printf(buffer, "continue"); + break; + +- case HLSL_IR_JUMP_DISCARD: +- vkd3d_string_buffer_printf(buffer, "discard"); ++ case HLSL_IR_JUMP_DISCARD_NEG: ++ vkd3d_string_buffer_printf(buffer, "discard_neg"); ++ break; ++ ++ case HLSL_IR_JUMP_DISCARD_NZ: ++ vkd3d_string_buffer_printf(buffer, "discard_nz"); + break; + + case HLSL_IR_JUMP_RETURN: +@@ -2431,7 +2525,7 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i + static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) + { + vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); +- dump_instr_list(ctx, buffer, &loop->body.instrs); ++ dump_block(ctx, buffer, &loop->body); + vkd3d_string_buffer_printf(buffer, " %10s }", ""); + } + +@@ -2450,6 +2544,8 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru + [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", + [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", + [HLSL_RESOURCE_GATHER_ALPHA] = "gather_alpha", ++ [HLSL_RESOURCE_SAMPLE_INFO] = "sample_info", ++ [HLSL_RESOURCE_RESINFO] = "resinfo", + }; + + assert(load->load_type < ARRAY_SIZE(type_names)); +@@ -2457,8 +2553,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru + dump_deref(buffer, &load->resource); + vkd3d_string_buffer_printf(buffer, ", sampler = "); + dump_deref(buffer, &load->sampler); +- vkd3d_string_buffer_printf(buffer, ", coords = "); +- dump_src(buffer, &load->coords); ++ if (load->coords.node) ++ { ++ vkd3d_string_buffer_printf(buffer, ", coords = "); ++ dump_src(buffer, &load->coords); ++ } + if (load->sample_index.node) + { + vkd3d_string_buffer_printf(buffer, ", sample index = "); +@@ -2614,7 +2713,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl + vkd3d_string_buffer_printf(&buffer, "\n"); + } + if (func->has_body) +- dump_instr_list(ctx, &buffer, &func->body.instrs); ++ dump_block(ctx, &buffer, &func->body); + + vkd3d_string_buffer_trace(&buffer); + vkd3d_string_buffer_cleanup(&buffer); +@@ -2703,6 +2802,7 @@ static void free_ir_if(struct hlsl_ir_if *if_node) + + static void free_ir_jump(struct hlsl_ir_jump *jump) + { ++ hlsl_src_remove(&jump->condition); + vkd3d_free(jump); + } + +@@ -2822,7 +2922,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) + + for (i = 0; i < attr->args_count; ++i) + hlsl_src_remove(&attr->args[i]); +- hlsl_free_instr_list(&attr->instrs); ++ hlsl_block_cleanup(&attr->instrs); + vkd3d_free((void *)attr->name); + vkd3d_free(attr); + } +@@ -3127,8 +3227,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + + for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) + { +- unsigned int n_variants = 0; + const char *const *variants; ++ unsigned int n_variants; + + switch (bt) + { +@@ -3148,6 +3248,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + break; + + default: ++ n_variants = 0; ++ variants = NULL; + break; + } + +@@ -3199,9 +3301,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + } + } + +-static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, ++static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, + const struct hlsl_profile_info *profile, struct vkd3d_shader_message_context *message_context) + { ++ unsigned int i; ++ + memset(ctx, 0, sizeof(*ctx)); + + ctx->profile = profile; +@@ -3210,7 +3314,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, + + if (!(ctx->source_files = hlsl_alloc(ctx, sizeof(*ctx->source_files)))) + return false; +- if (!(ctx->source_files[0] = hlsl_strdup(ctx, source_name ? source_name : ""))) ++ if (!(ctx->source_files[0] = hlsl_strdup(ctx, compile_info->source_name ? compile_info->source_name : ""))) + { + vkd3d_free(ctx->source_files); + return false; +@@ -3249,6 +3353,19 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, + return false; + ctx->cur_buffer = ctx->globals_buffer; + ++ for (i = 0; i < compile_info->option_count; ++i) ++ { ++ const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; ++ ++ if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) ++ { ++ if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) ++ ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; ++ else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) ++ ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; ++ } ++ } ++ + return true; + } + +@@ -3260,6 +3377,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) + struct hlsl_type *type, *next_type; + unsigned int i; + ++ hlsl_block_cleanup(&ctx->static_initializers); ++ + for (i = 0; i < ctx->source_files_count; ++i) + vkd3d_free((void *)ctx->source_files[i]); + vkd3d_free(ctx->source_files); +@@ -3283,6 +3402,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) + vkd3d_free((void *)buffer->name); + vkd3d_free(buffer); + } ++ ++ vkd3d_free(ctx->constant_defs.regs); + } + + int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, +@@ -3324,7 +3445,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d + return VKD3D_ERROR_INVALID_ARGUMENT; + } + +- if (!hlsl_ctx_init(&ctx, compile_info->source_name, profile, message_context)) ++ if (!hlsl_ctx_init(&ctx, compile_info, profile, message_context)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if ((ret = hlsl_lexer_compile(&ctx, hlsl)) == 2) +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index bce48e94b24..0a8d3a692a3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -257,7 +257,7 @@ struct hlsl_reg + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ +- uint32_t bind_count; ++ uint32_t allocation_size; + /* For numeric registers, a writemask can be provided to indicate the reservation of only some + * of the 4 components. */ + unsigned int writemask; +@@ -337,7 +337,7 @@ struct hlsl_src + struct hlsl_attribute + { + const char *name; +- struct list instrs; ++ struct hlsl_block instrs; + struct vkd3d_shader_location loc; + unsigned int args_count; + struct hlsl_src args[]; +@@ -417,11 +417,15 @@ struct hlsl_ir_var + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; ++ /* Minimum number of binds required to include all object components actually used in the shader. ++ * It may be less than the allocation size, e.g. for texture arrays. */ ++ unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; + + uint32_t is_input_semantic : 1; + uint32_t is_output_semantic : 1; + uint32_t is_uniform : 1; + uint32_t is_param : 1; ++ uint32_t is_separated_resource : 1; + }; + + /* Sized array of variables representing a function's parameters. */ +@@ -502,7 +506,11 @@ enum hlsl_ir_expr_op + HLSL_OP1_COS, + HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ + HLSL_OP1_DSX, ++ HLSL_OP1_DSX_COARSE, ++ HLSL_OP1_DSX_FINE, + HLSL_OP1_DSY, ++ HLSL_OP1_DSY_COARSE, ++ HLSL_OP1_DSY_FINE, + HLSL_OP1_EXP2, + HLSL_OP1_FLOOR, + HLSL_OP1_FRACT, +@@ -558,7 +566,8 @@ enum hlsl_ir_jump_type + { + HLSL_IR_JUMP_BREAK, + HLSL_IR_JUMP_CONTINUE, +- HLSL_IR_JUMP_DISCARD, ++ HLSL_IR_JUMP_DISCARD_NEG, ++ HLSL_IR_JUMP_DISCARD_NZ, + HLSL_IR_JUMP_RETURN, + }; + +@@ -566,6 +575,8 @@ struct hlsl_ir_jump + { + struct hlsl_ir_node node; + enum hlsl_ir_jump_type type; ++ /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ ++ struct hlsl_src condition; + }; + + struct hlsl_ir_swizzle +@@ -600,9 +611,11 @@ struct hlsl_deref + * components, within the pertaining regset), from the start of the variable, of the part + * referenced. + * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- +- * before writing the bytecode. */ ++ * before writing the bytecode. ++ * Since the type information cannot longer be retrieved from the offset alone, the type is ++ * stored in the data_type field. */ + struct hlsl_src offset; +- enum hlsl_regset offset_regset; ++ struct hlsl_type *data_type; + }; + + struct hlsl_ir_load +@@ -624,6 +637,8 @@ enum hlsl_resource_load_type + HLSL_RESOURCE_GATHER_GREEN, + HLSL_RESOURCE_GATHER_BLUE, + HLSL_RESOURCE_GATHER_ALPHA, ++ HLSL_RESOURCE_SAMPLE_INFO, ++ HLSL_RESOURCE_RESINFO, + }; + + struct hlsl_ir_resource_load +@@ -803,7 +818,11 @@ struct hlsl_ctx + * Only used for SM1 profiles. */ + struct hlsl_constant_defs + { +- struct hlsl_vec4 *values; ++ struct hlsl_constant_register ++ { ++ uint32_t index; ++ struct hlsl_vec4 value; ++ } *regs; + size_t count, size; + } constant_defs; + /* Number of temp. registers required for the shader to run, i.e. the largest temp register +@@ -1055,10 +1074,12 @@ const char *debug_hlsl_writemask(unsigned int writemask); + const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count); + + struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); ++struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, ++ unsigned int index); + struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); + const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); + +-struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, ++struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); + void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); + bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); +@@ -1120,7 +1141,7 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, +- enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); ++ enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); + + void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); + +@@ -1132,6 +1153,8 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls + const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); + + struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); + struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, +@@ -1156,6 +1179,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned in + struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, + struct hlsl_type *type, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, ++ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope); + struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, + unsigned int sample_count); + struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); +@@ -1187,6 +1212,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type); + unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); + struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, + unsigned int index); ++unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, ++ enum hlsl_regset regset, unsigned int index); + bool hlsl_type_is_row_major(const struct hlsl_type *type); + unsigned int hlsl_type_minor_size(const struct hlsl_type *type); + unsigned int hlsl_type_major_size(const struct hlsl_type *type); +@@ -1227,7 +1254,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun + bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, + const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); + bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); ++ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); + int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); + + int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 0e07fe578e1..29e0ff0c5be 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -53,7 +53,7 @@ struct parse_initializer + { + struct hlsl_ir_node **args; + unsigned int args_count; +- struct list *instrs; ++ struct hlsl_block *instrs; + bool braces; + }; + +@@ -73,6 +73,10 @@ struct parse_variable_def + struct hlsl_semantic semantic; + struct hlsl_reg_reservation reg_reservation; + struct parse_initializer initializer; ++ ++ struct hlsl_type *basic_type; ++ unsigned int modifiers; ++ struct vkd3d_shader_location modifiers_loc; + }; + + struct parse_function +@@ -85,8 +89,8 @@ struct parse_function + + struct parse_if_body + { +- struct list *then_block; +- struct list *else_block; ++ struct hlsl_block *then_block; ++ struct hlsl_block *else_block; + }; + + enum parse_assign_op +@@ -129,9 +133,18 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "%s", s); + } + +-static struct hlsl_ir_node *node_from_list(struct list *list) ++static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) ++{ ++ return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); ++} ++ ++static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) + { +- return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); ++ struct hlsl_block *block; ++ ++ if ((block = hlsl_alloc(ctx, sizeof(*block)))) ++ hlsl_block_init(block); ++ return block; + } + + static struct list *make_empty_list(struct hlsl_ctx *ctx) +@@ -143,10 +156,10 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) + return list; + } + +-static void destroy_instr_list(struct list *list) ++static void destroy_block(struct hlsl_block *block) + { +- hlsl_free_instr_list(list); +- vkd3d_free(list); ++ hlsl_block_cleanup(block); ++ vkd3d_free(block); + } + + static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct hlsl_type *src, +@@ -273,10 +286,7 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ + return hlsl_types_are_componentwise_equal(ctx, src, dst); + } + +-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +- unsigned int comp, const struct vkd3d_shader_location *loc); +- +-static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) + { + struct hlsl_type *src_type = node->data_type; +@@ -313,7 +323,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + { + struct hlsl_ir_node *component_load; + struct hlsl_type *dst_comp_type; +- struct hlsl_block block; ++ struct hlsl_block store_block; + unsigned int src_idx; + + if (broadcast) +@@ -333,21 +343,21 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + + dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); + +- if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) ++ if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) + return NULL; + + if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) + return NULL; +- list_add_tail(instrs, &cast->entry); ++ hlsl_block_add_instr(block, cast); + +- if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) ++ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) + return NULL; +- list_move_tail(instrs, &block.instrs); ++ hlsl_block_add_block(block, &store_block); + } + + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); + + return &load->node; + } +@@ -355,12 +365,12 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + { + if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) + return NULL; +- list_add_tail(instrs, &cast->entry); ++ hlsl_block_add_instr(block, cast); + return cast; + } + } + +-static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) + { + struct hlsl_type *src_type = node->data_type; +@@ -386,7 +396,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", + src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); + +- return add_cast(ctx, instrs, node, dst_type, loc); ++ return add_cast(ctx, block, node, dst_type, loc); + } + + static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, +@@ -405,29 +415,29 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, + return modifiers | mod; + } + +-static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) ++static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) + { + struct hlsl_ir_node *condition, *not, *iff, *jump; + struct hlsl_block then_block; + + /* E.g. "for (i = 0; ; ++i)". */ +- if (list_empty(cond_list)) ++ if (list_empty(&cond_block->instrs)) + return true; + +- condition = node_from_list(cond_list); ++ condition = node_from_block(cond_block); + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) + return false; +- list_add_tail(cond_list, ¬->entry); ++ hlsl_block_add_instr(cond_block, not); + + hlsl_block_init(&then_block); + +- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) + return false; + hlsl_block_add_instr(&then_block, jump); + + if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) + return false; +- list_add_tail(cond_list, &iff->entry); ++ hlsl_block_add_instr(cond_block, iff); + return true; + } + +@@ -454,10 +464,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att + return false; + } + +-static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, +- struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) ++static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, ++ const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, ++ struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) + { +- struct hlsl_block body_block; + struct hlsl_ir_node *loop; + unsigned int i; + +@@ -476,7 +486,7 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const + } + else + { +- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); ++ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); + } + } + else if (!strcmp(attr->name, "loop") +@@ -491,38 +501,34 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const + } + } + +- if (!init && !(init = make_empty_list(ctx))) ++ if (!init && !(init = make_empty_block(ctx))) + goto oom; + + if (!append_conditional_break(ctx, cond)) + goto oom; + +- hlsl_block_init(&body_block); +- +- if (type != LOOP_DO_WHILE) +- list_move_tail(&body_block.instrs, cond); +- +- list_move_tail(&body_block.instrs, body); +- + if (iter) +- list_move_tail(&body_block.instrs, iter); ++ hlsl_block_add_block(body, iter); + + if (type == LOOP_DO_WHILE) +- list_move_tail(&body_block.instrs, cond); ++ list_move_tail(&body->instrs, &cond->instrs); ++ else ++ list_move_head(&body->instrs, &cond->instrs); + +- if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) ++ if (!(loop = hlsl_new_loop(ctx, body, loc))) + goto oom; +- list_add_tail(init, &loop->entry); ++ hlsl_block_add_instr(init, loop); + +- vkd3d_free(cond); +- vkd3d_free(body); ++ destroy_block(cond); ++ destroy_block(body); ++ destroy_block(iter); + return init; + + oom: +- destroy_instr_list(init); +- destroy_instr_list(cond); +- destroy_instr_list(iter); +- destroy_instr_list(body); ++ destroy_block(init); ++ destroy_block(cond); ++ destroy_block(iter); ++ destroy_block(body); + return NULL; + } + +@@ -539,7 +545,7 @@ static unsigned int initializer_size(const struct parse_initializer *initializer + + static void free_parse_initializer(struct parse_initializer *initializer) + { +- destroy_instr_list(initializer->instrs); ++ destroy_block(initializer->instrs); + vkd3d_free(initializer->args); + } + +@@ -625,7 +631,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod + return NULL; + } + +-static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, ++static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) + { + struct hlsl_type *return_type = ctx->cur_function->return_type; +@@ -637,7 +643,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, + { + struct hlsl_ir_node *store; + +- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) ++ if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) + return false; + + if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) +@@ -656,18 +662,18 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); + } + +- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) + return false; +- list_add_tail(instrs, &jump->entry); ++ hlsl_block_add_instr(block, jump); + + return true; + } + +-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +- unsigned int comp, const struct vkd3d_shader_location *loc) ++struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *load, *store; +- struct hlsl_block block; ++ struct hlsl_block load_block; + struct hlsl_ir_var *var; + struct hlsl_deref src; + +@@ -676,17 +682,17 @@ static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list + + if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) + return NULL; +- list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + + hlsl_init_simple_deref_from_var(&src, var); +- if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) ++ if (!(load = hlsl_new_load_component(ctx, &load_block, &src, comp, loc))) + return NULL; +- list_move_tail(instrs, &block.instrs); ++ hlsl_block_add_block(block, &load_block); + + return load; + } + +-static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, ++static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *record, + unsigned int idx, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *index, *c; +@@ -695,20 +701,20 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct + + if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) + return false; +- list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); + + if (!(index = hlsl_new_index(ctx, record, c, loc))) + return false; +- list_add_tail(instrs, &index->entry); ++ hlsl_block_add_instr(block, index); + + return true; + } + +-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc); + +-static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, ++static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *array, + struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; +@@ -731,13 +737,13 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h + return false; + } + +- if (!(index = add_implicit_conversion(ctx, instrs, index, ++ if (!(index = add_implicit_conversion(ctx, block, index, + hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) + return false; + + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) + return false; +- list_add_tail(instrs, &return_index->entry); ++ hlsl_block_add_instr(block, return_index); + + return true; + } +@@ -750,7 +756,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h + + if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) + return false; +- list_add_tail(instrs, &cast->entry); ++ hlsl_block_add_instr(block, cast); + index = cast; + + if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) +@@ -764,7 +770,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h + + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) + return false; +- list_add_tail(instrs, &return_index->entry); ++ hlsl_block_add_instr(block, return_index); + + return true; + } +@@ -830,6 +836,16 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) + return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; + } + ++static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); ++} ++ ++static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return !shader_profile_version_ge(ctx, major, minor); ++} ++ + static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + struct hlsl_type *type, unsigned int modifiers, struct list *defs) + { +@@ -1020,7 +1036,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const + struct hlsl_reg_reservation reservation = {0}; + char *endptr; + +- if (ctx->profile->major_version < 4) ++ if (shader_profile_version_lt(ctx, 4, 0)) + return reservation; + + reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); +@@ -1079,17 +1095,17 @@ static struct hlsl_ir_function_decl *get_func_decl(struct rb_tree *funcs, + return NULL; + } + +-static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) ++static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr) + { +- struct list *list; ++ struct hlsl_block *block; + +- if (!(list = make_empty_list(ctx))) ++ if (!(block = make_empty_block(ctx))) + { +- hlsl_free_instr(node); ++ hlsl_free_instr(instr); + return NULL; + } +- list_add_tail(list, &node->entry); +- return list; ++ hlsl_block_add_instr(block, instr); ++ return block; + } + + static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -1097,20 +1113,50 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + { + struct hlsl_ir_constant *constant; + struct hlsl_ir_node *node; ++ struct hlsl_block expr; + unsigned int ret = 0; + bool progress; + +- if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), ++ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ switch (node->type) ++ { ++ case HLSL_IR_CONSTANT: ++ case HLSL_IR_EXPR: ++ case HLSL_IR_SWIZZLE: ++ case HLSL_IR_LOAD: ++ case HLSL_IR_INDEX: ++ continue; ++ case HLSL_IR_CALL: ++ case HLSL_IR_IF: ++ case HLSL_IR_LOOP: ++ case HLSL_IR_JUMP: ++ case HLSL_IR_RESOURCE_LOAD: ++ case HLSL_IR_RESOURCE_STORE: ++ case HLSL_IR_STORE: ++ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Expected literal expression."); ++ } ++ } ++ ++ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) ++ return 0; ++ hlsl_block_add_block(&expr, block); ++ ++ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) ++ { ++ hlsl_block_cleanup(&expr); + return 0; ++ } + + do + { +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); +- progress |= hlsl_copy_propagation_execute(ctx, block); ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, &expr); + } while (progress); + +- node = node_from_list(&block->instrs); ++ node = node_from_block(&expr); + if (node->type == HLSL_IR_CONSTANT) + { + constant = hlsl_ir_constant(node); +@@ -1119,9 +1165,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + else + { + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "Failed to evaluate constant expression %d.", node->type); ++ "Failed to evaluate constant expression."); + } + ++ hlsl_block_cleanup(&expr); ++ + return ret; + } + +@@ -1253,7 +1301,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct + return true; + } + +-static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], + struct hlsl_type *type, const struct vkd3d_shader_location *loc) + { +@@ -1277,38 +1325,38 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, + for (i = 0; i < type->dimy * type->dimx; ++i) + { + struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; +- struct hlsl_block block; ++ struct hlsl_block store_block; + unsigned int j; + + for (j = 0; j < HLSL_MAX_OPERANDS; j++) + { + if (operands[j]) + { +- if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, block, operands[j], i, loc))) + return NULL; + + cell_operands[j] = load; + } + } + +- if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) ++ if (!(value = add_expr(ctx, block, op, cell_operands, scalar_type, loc))) + return NULL; + +- if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) ++ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, value)) + return NULL; +- list_move_tail(instrs, &block.instrs); ++ hlsl_block_add_block(block, &store_block); + } + + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) + return NULL; +- list_add_tail(instrs, &var_load->node.entry); ++ hlsl_block_add_instr(block, &var_load->node); + + return &var_load->node; + } + + if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) + return NULL; +- list_add_tail(instrs, &expr->entry); ++ hlsl_block_add_instr(block, expr); + + return expr; + } +@@ -1334,23 +1382,23 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * + } + } + +-static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; + +- return add_expr(ctx, instrs, op, args, arg->data_type, loc); ++ return add_expr(ctx, block, op, args, arg->data_type, loc); + } + +-static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) + { + check_integer_type(ctx, arg); + +- return add_unary_arithmetic_expr(ctx, instrs, op, arg, loc); ++ return add_unary_arithmetic_expr(ctx, block, op, arg, loc); + } + +-static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; +@@ -1359,10 +1407,10 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct + bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, + arg->data_type->dimx, arg->data_type->dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) + return NULL; + +- return add_expr(ctx, instrs, op, args, bool_type, loc); ++ return add_expr(ctx, block, op, args, bool_type, loc); + } + + static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, +@@ -1378,7 +1426,7 @@ static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const str + return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + } + +-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { +@@ -1387,49 +1435,26 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str + + common_type = get_common_numeric_type(ctx, arg1, arg2, loc); + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) + return NULL; + +- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) ++ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) + return NULL; + +- return add_expr(ctx, instrs, op, args, common_type, loc); ++ return add_expr(ctx, block, op, args, common_type, loc); + } + +-static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +- +- list_move_tail(list1, list2); +- vkd3d_free(list2); +- add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); +- return list1; +-} +- +-static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { + check_integer_type(ctx, arg1); + check_integer_type(ctx, arg2); + +- return add_binary_arithmetic_expr(ctx, instrs, op, arg1, arg2, loc); +-} +- +-static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +- +- list_move_tail(list1, list2); +- vkd3d_free(list2); +- add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); +- +- return list1; ++ return add_binary_arithmetic_expr(ctx, block, op, arg1, arg2, loc); + } + +-static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { +@@ -1445,27 +1470,16 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str + common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) + return NULL; + +- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) ++ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) + return NULL; + +- return add_expr(ctx, instrs, op, args, return_type, loc); ++ return add_expr(ctx, block, op, args, return_type, loc); + } + +-static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +- +- list_move_tail(list1, list2); +- vkd3d_free(list2); +- add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); +- return list1; +-} +- +-static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { +@@ -1479,28 +1493,16 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct + + common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) + return NULL; + +- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) ++ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) + return NULL; + +- return add_expr(ctx, instrs, op, args, common_type, loc); ++ return add_expr(ctx, block, op, args, common_type, loc); + } + +-static struct list *add_binary_logical_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +- +- list_move_tail(list1, list2); +- vkd3d_free(list2); +- add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); +- +- return list1; +-} +- +-static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { +@@ -1522,28 +1524,16 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l + return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc))) + return NULL; + +- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) ++ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc))) + return NULL; + +- return add_expr(ctx, instrs, op, args, return_type, loc); +-} +- +-static struct list *add_binary_shift_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +- +- list_move_tail(list1, list2); +- vkd3d_free(list2); +- add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); +- +- return list1; ++ return add_expr(ctx, block, op, args, return_type, loc); + } + +-static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) + { + enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); +@@ -1557,8 +1547,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, arg1->data_type))) +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid type %s.\n", string->buffer); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return NULL; + } +@@ -1568,8 +1557,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, arg2->data_type))) +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid type %s.\n", string->buffer); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return NULL; + } +@@ -1598,6 +1586,53 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + return add_expr(ctx, instrs, op, args, ret_type, loc); + } + ++static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, ++ struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); ++ ++ hlsl_block_add_block(block1, block2); ++ destroy_block(block2); ++ ++ switch (op) ++ { ++ case HLSL_OP2_ADD: ++ case HLSL_OP2_DIV: ++ case HLSL_OP2_MOD: ++ case HLSL_OP2_MUL: ++ add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ case HLSL_OP2_BIT_AND: ++ case HLSL_OP2_BIT_OR: ++ case HLSL_OP2_BIT_XOR: ++ add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ case HLSL_OP2_LESS: ++ case HLSL_OP2_GEQUAL: ++ case HLSL_OP2_EQUAL: ++ case HLSL_OP2_NEQUAL: ++ add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ case HLSL_OP2_LOGIC_AND: ++ case HLSL_OP2_LOGIC_OR: ++ add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ case HLSL_OP2_LSHIFT: ++ case HLSL_OP2_RSHIFT: ++ add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ return block1; ++} ++ + static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) + { + static const enum hlsl_ir_expr_op ops[] = +@@ -1654,7 +1689,7 @@ static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsig + return true; + } + +-static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *lhs, ++static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, + enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) + { + struct hlsl_type *lhs_type = lhs->data_type; +@@ -1663,7 +1698,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + + if (assign_op == ASSIGN_OP_SUB) + { +- if (!(rhs = add_unary_arithmetic_expr(ctx, instrs, HLSL_OP1_NEG, rhs, &rhs->loc))) ++ if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) + return NULL; + assign_op = ASSIGN_OP_ADD; + } +@@ -1672,14 +1707,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + enum hlsl_ir_expr_op op = op_from_assignment(assign_op); + + assert(op); +- if (!(rhs = add_binary_arithmetic_expr(ctx, instrs, op, lhs, rhs, &rhs->loc))) ++ if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) + return NULL; + } + + if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) + writemask = (1 << lhs_type->dimx) - 1; + +- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) ++ if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) + return NULL; + + while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) +@@ -1708,7 +1743,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + { + return NULL; + } +- list_add_tail(instrs, &new_swizzle->entry); ++ hlsl_block_add_instr(block, new_swizzle); + + lhs = swizzle->val.node; + rhs = new_swizzle; +@@ -1754,7 +1789,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + hlsl_cleanup_deref(&resource_deref); + return NULL; + } +- list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + hlsl_cleanup_deref(&resource_deref); + } + else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) +@@ -1773,13 +1808,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + + if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) + return NULL; +- list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); + + if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) + return NULL; +- list_add_tail(instrs, &cell->entry); ++ hlsl_block_add_instr(block, cell); + +- if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) ++ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) + return NULL; + + if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) +@@ -1790,7 +1825,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + hlsl_cleanup_deref(&deref); + return NULL; + } +- list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + hlsl_cleanup_deref(&deref); + } + } +@@ -1807,7 +1842,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + hlsl_cleanup_deref(&deref); + return NULL; + } +- list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + hlsl_cleanup_deref(&deref); + } + +@@ -1816,14 +1851,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + * the last instruction in the list, we do need to copy. */ + if (!(copy = hlsl_new_copy(ctx, rhs))) + return NULL; +- list_add_tail(instrs, ©->entry); ++ hlsl_block_add_instr(block, copy); + return copy; + } + +-static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, ++static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, + const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *lhs = node_from_list(instrs); ++ struct hlsl_ir_node *lhs = node_from_block(block); + struct hlsl_ir_node *one; + + if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) +@@ -1832,9 +1867,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem + + if (!(one = hlsl_new_int_constant(ctx, 1, loc))) + return false; +- list_add_tail(instrs, &one->entry); ++ hlsl_block_add_instr(block, one); + +- if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) ++ if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) + return false; + + if (post) +@@ -1843,7 +1878,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem + + if (!(copy = hlsl_new_copy(ctx, lhs))) + return false; +- list_add_tail(instrs, ©->entry); ++ hlsl_block_add_instr(block, copy); + + /* Post increment/decrement expressions are considered const. */ + if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) +@@ -1853,7 +1888,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem + return true; + } + +-static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, ++static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) + { + unsigned int src_comp_count = hlsl_type_component_count(src->data_type); +@@ -1868,7 +1903,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_type *dst_comp_type; + struct hlsl_block block; + +- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) ++ if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) + return; + + dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); +@@ -1878,7 +1913,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, + + if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) + return; +- list_move_tail(instrs, &block.instrs); ++ hlsl_block_add_block(instrs, &block); + + ++*store_index; + } +@@ -1924,211 +1959,231 @@ static bool type_has_numeric_components(struct hlsl_type *type) + return false; + } + +-static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, +- unsigned int modifiers, const struct vkd3d_shader_location *modifiers_loc, struct list *var_list) ++static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, ++ const struct vkd3d_shader_location *loc) + { +- struct parse_variable_def *v, *v_next; ++ modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); ++ if (modifiers) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_modifiers_to_string(ctx, modifiers))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++} ++ ++static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) ++{ ++ struct hlsl_type *basic_type = v->basic_type; + struct hlsl_ir_function_decl *func; +- unsigned int invalid_modifiers; +- struct list *statements_list; ++ struct hlsl_semantic new_semantic; ++ uint32_t modifiers = v->modifiers; ++ bool unbounded_res_array = false; + struct hlsl_ir_var *var; + struct hlsl_type *type; + bool local = true; ++ char *var_name; ++ unsigned int i; ++ ++ assert(basic_type); + + if (basic_type->class == HLSL_CLASS_MATRIX) + assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + +- if (!(statements_list = make_empty_list(ctx))) +- { +- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) +- free_parse_variable_def(v); +- vkd3d_free(var_list); +- return NULL; +- } +- +- if (!var_list) +- return statements_list; ++ type = basic_type; + +- invalid_modifiers = modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); +- if (invalid_modifiers) ++ if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + { +- struct vkd3d_string_buffer *string; +- +- if ((string = hlsl_modifiers_to_string(ctx, invalid_modifiers))) +- hlsl_error(ctx, modifiers_loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +- "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); +- hlsl_release_string_buffer(ctx, string); ++ for (i = 0; i < v->arrays.count; ++i) ++ unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); + } + +- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) ++ if (unbounded_res_array) + { +- bool unbounded_res_array = false; +- unsigned int i; +- +- type = basic_type; +- +- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) ++ if (v->arrays.count == 1) + { +- for (i = 0; i < v->arrays.count; ++i) +- unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); ++ hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); ++ return; + } +- +- if (unbounded_res_array) ++ else + { +- if (v->arrays.count == 1) +- { +- hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); +- free_parse_variable_def(v); +- continue; +- } +- else +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Unbounded resource arrays cannot be multi-dimensional."); +- } ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Unbounded resource arrays cannot be multi-dimensional."); + } +- else ++ } ++ else ++ { ++ for (i = 0; i < v->arrays.count; ++i) + { +- for (i = 0; i < v->arrays.count; ++i) ++ if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) + { +- if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) +- { +- unsigned int size = initializer_size(&v->initializer); +- unsigned int elem_components = hlsl_type_component_count(type); +- +- if (i < v->arrays.count - 1) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Only innermost array size can be implicit."); +- free_parse_initializer(&v->initializer); +- v->initializer.args_count = 0; +- } +- else if (elem_components == 0) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Cannot declare an implicit size array of a size 0 type."); +- free_parse_initializer(&v->initializer); +- v->initializer.args_count = 0; +- } +- else if (size == 0) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Implicit size arrays need to be initialized."); +- free_parse_initializer(&v->initializer); +- v->initializer.args_count = 0; ++ unsigned int size = initializer_size(&v->initializer); ++ unsigned int elem_components = hlsl_type_component_count(type); + +- } +- else if (size % elem_components != 0) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Cannot initialize implicit size array with %u components, expected a multiple of %u.", +- size, elem_components); +- free_parse_initializer(&v->initializer); +- v->initializer.args_count = 0; +- } +- else +- { +- v->arrays.sizes[i] = size / elem_components; +- } ++ if (i < v->arrays.count - 1) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Only innermost array size can be implicit."); ++ v->initializer.args_count = 0; ++ } ++ else if (elem_components == 0) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Cannot declare an implicit size array of a size 0 type."); ++ v->initializer.args_count = 0; ++ } ++ else if (size == 0) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Implicit size arrays need to be initialized."); ++ v->initializer.args_count = 0; ++ } ++ else if (size % elem_components != 0) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Cannot initialize implicit size array with %u components, expected a multiple of %u.", ++ size, elem_components); ++ v->initializer.args_count = 0; ++ } ++ else ++ { ++ v->arrays.sizes[i] = size / elem_components; + } +- type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); + } ++ type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); + } +- vkd3d_free(v->arrays.sizes); ++ } + +- if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) ++ if (!(var_name = vkd3d_strdup(v->name))) ++ return; ++ ++ new_semantic = v->semantic; ++ if (v->semantic.name) ++ { ++ if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) + { +- free_parse_variable_def(v); +- continue; ++ vkd3d_free(var_name); ++ return; + } ++ } + +- var->buffer = ctx->cur_buffer; ++ if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) ++ { ++ hlsl_cleanup_semantic(&new_semantic); ++ vkd3d_free(var_name); ++ return; ++ } + +- if (var->buffer == ctx->globals_buffer) +- { +- if (var->reg_reservation.offset_type) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, +- "packoffset() is only allowed inside constant buffer declarations."); +- } ++ var->buffer = ctx->cur_buffer; + +- if (ctx->cur_scope == ctx->globals) +- { +- local = false; ++ if (var->buffer == ctx->globals_buffer) ++ { ++ if (var->reg_reservation.offset_type) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() is only allowed inside constant buffer declarations."); ++ } + +- if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +- "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); ++ if (ctx->cur_scope == ctx->globals) ++ { ++ local = false; + +- /* Mark it as uniform. We need to do this here since synthetic +- * variables also get put in the global scope, but shouldn't be +- * considered uniforms, and we have no way of telling otherwise. */ +- if (!(modifiers & HLSL_STORAGE_STATIC)) +- var->storage_modifiers |= HLSL_STORAGE_UNIFORM; ++ if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); + +- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && +- type_has_object_components(var->data_type, true)) +- { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Target profile doesn't support objects as struct members in uniform variables.\n"); +- } ++ /* Mark it as uniform. We need to do this here since synthetic ++ * variables also get put in the global scope, but shouldn't be ++ * considered uniforms, and we have no way of telling otherwise. */ ++ if (!(modifiers & HLSL_STORAGE_STATIC)) ++ var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + +- if ((func = hlsl_get_func_decl(ctx, var->name))) +- { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, +- "'%s' is already defined as a function.", var->name); +- hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, +- "'%s' was previously defined here.", var->name); +- } +- } +- else ++ if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && ++ type_has_object_components(var->data_type, true)) + { +- static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED +- | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; +- +- if (modifiers & invalid) +- { +- struct vkd3d_string_buffer *string; ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Target profile doesn't support objects as struct members in uniform variables."); ++ } + +- if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +- "Modifiers '%s' are not allowed on local variables.", string->buffer); +- hlsl_release_string_buffer(ctx, string); +- } +- if (var->semantic.name) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, +- "Semantics are not allowed on local variables."); ++ if ((func = hlsl_get_func_decl(ctx, var->name))) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, ++ "'%s' is already defined as a function.", var->name); ++ hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, ++ "'%s' was previously defined here.", var->name); + } ++ } ++ else ++ { ++ static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED ++ | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; + +- if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) +- && type_has_object_components(var->data_type, false)) ++ if (modifiers & invalid) + { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Static variables cannot have both numeric and resource components."); ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Modifiers '%s' are not allowed on local variables.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); + } ++ if (var->semantic.name) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, ++ "Semantics are not allowed on local variables."); + +- if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count +- && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) ++ if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count && !(modifiers & HLSL_STORAGE_STATIC)) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, +- "Const variable \"%s\" is missing an initializer.", var->name); +- hlsl_free_var(var); +- free_parse_initializer(&v->initializer); +- vkd3d_free(v); +- continue; ++ "Const variable \"%s\" is missing an initializer.", var->name); + } ++ } ++ ++ if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) ++ && type_has_object_components(var->data_type, false)) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Static variables cannot have both numeric and resource components."); ++ } ++ ++ if (!hlsl_add_var(ctx, var, local)) ++ { ++ struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); + +- if (!hlsl_add_var(ctx, var, local)) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, ++ "Variable \"%s\" was already declared in this scope.", var->name); ++ hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); ++ hlsl_free_var(var); ++ return; ++ } ++} ++ ++static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) ++{ ++ struct parse_variable_def *v, *v_next; ++ struct hlsl_block *initializers; ++ struct hlsl_ir_var *var; ++ struct hlsl_type *type; ++ ++ if (!(initializers = make_empty_block(ctx))) ++ { ++ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + { +- struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); ++ free_parse_variable_def(v); ++ } ++ vkd3d_free(var_list); ++ return NULL; ++ } + +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, +- "Variable \"%s\" was already declared in this scope.", var->name); +- hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); +- hlsl_free_var(var); +- free_parse_initializer(&v->initializer); +- vkd3d_free(v); ++ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) ++ { ++ /* If this fails, the variable failed to be declared. */ ++ if (!(var = hlsl_get_var(ctx->cur_scope, v->name))) ++ { ++ free_parse_variable_def(v); + continue; + } ++ type = var->data_type; + + if (v->initializer.args_count) + { +@@ -2143,8 +2198,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in initializer, but got %u.", + hlsl_type_component_count(type), size); +- free_parse_initializer(&v->initializer); +- vkd3d_free(v); ++ free_parse_variable_def(v); + continue; + } + +@@ -2159,16 +2213,14 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); + + assert(v->initializer.args_count == 1); +- list_add_tail(v->initializer.instrs, &load->node.entry); ++ hlsl_block_add_instr(v->initializer.instrs, &load->node); + add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); + } + +- if (modifiers & HLSL_STORAGE_STATIC) +- list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); ++ if (var->storage_modifiers & HLSL_STORAGE_STATIC) ++ hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); + else +- list_move_tail(statements_list, v->initializer.instrs); +- vkd3d_free(v->initializer.args); +- vkd3d_free(v->initializer.instrs); ++ hlsl_block_add_block(initializers, v->initializer.instrs); + } + else if (var->storage_modifiers & HLSL_STORAGE_STATIC) + { +@@ -2178,34 +2230,35 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + + if (type_has_object_components(var->data_type, false)) + { +- vkd3d_free(v); ++ free_parse_variable_def(v); + continue; + } + + if (!(zero = hlsl_new_uint_constant(ctx, 0, &var->loc))) + { +- vkd3d_free(v); ++ free_parse_variable_def(v); + continue; + } + hlsl_block_add_instr(&ctx->static_initializers, zero); + +- if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) ++ if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) + { +- vkd3d_free(v); ++ free_parse_variable_def(v); + continue; + } + + if (!(store = hlsl_new_simple_store(ctx, var, cast))) + { +- vkd3d_free(v); ++ free_parse_variable_def(v); + continue; + } + hlsl_block_add_instr(&ctx->static_initializers, store); + } +- vkd3d_free(v); ++ free_parse_variable_def(v); + } ++ + vkd3d_free(var_list); +- return statements_list; ++ return initializers; + } + + struct find_function_call_args +@@ -2394,18 +2447,18 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, + + if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) + return false; +- list_add_tail(params->instrs, &one->entry); ++ hlsl_block_add_instr(params->instrs, one); + + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + return false; +- list_add_tail(params->instrs, &zero->entry); ++ hlsl_block_add_instr(params->instrs, zero); + + mul = one; + + count = hlsl_type_component_count(arg->data_type); + for (i = 0; i < count; ++i) + { +- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) +@@ -2431,7 +2484,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, + { + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + return false; +- list_add_tail(params->instrs, &zero->entry); ++ hlsl_block_add_instr(params->instrs, zero); + + if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) + return false; +@@ -2442,14 +2495,14 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, + { + if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) + return false; +- list_add_tail(params->instrs, &bfalse->entry); ++ hlsl_block_add_instr(params->instrs, bfalse); + + or = bfalse; + + count = hlsl_type_component_count(arg->data_type); + for (i = 0; i < count; ++i) + { +- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + + if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) +@@ -2544,6 +2597,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); + } + ++static bool intrinsic_clip(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *condition, *jump; ++ ++ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) ++ return false; ++ ++ condition = params->args[0]; ++ ++ if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, condition->data_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Argument type cannot exceed 4 components, got type \"%s\".", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; ++ } ++ ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, jump); ++ ++ return true; ++} ++ + static bool intrinsic_cos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -2579,26 +2660,26 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, + + if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) + return false; +- list_add_tail(params->instrs, &arg1_swzl1->entry); ++ hlsl_block_add_instr(params->instrs, arg1_swzl1); + + if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) + return false; +- list_add_tail(params->instrs, &arg2_swzl1->entry); ++ hlsl_block_add_instr(params->instrs, arg2_swzl1); + + if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) + return false; + + if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) + return false; +- list_add_tail(params->instrs, &mul1_neg->entry); ++ hlsl_block_add_instr(params->instrs, mul1_neg); + + if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) + return false; +- list_add_tail(params->instrs, &arg1_swzl2->entry); ++ hlsl_block_add_instr(params->instrs, arg1_swzl2); + + if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) + return false; +- list_add_tail(params->instrs, &arg2_swzl2->entry); ++ hlsl_block_add_instr(params->instrs, arg2_swzl2); + + if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) + return false; +@@ -2617,6 +2698,28 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); + } + ++static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); ++} ++ ++static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); ++} ++ + static bool intrinsic_ddy(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -2628,6 +2731,28 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); + } + ++static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); ++} ++ ++static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); ++} ++ + static bool intrinsic_distance(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -2668,7 +2793,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, + /* 1/ln(2) */ + if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) + return false; +- list_add_tail(params->instrs, &coeff->entry); ++ hlsl_block_add_instr(params->instrs, coeff); + + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) + return false; +@@ -2715,7 +2840,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer + + if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) + return false; +- list_add_tail(params->instrs, &zero->entry); ++ hlsl_block_add_instr(params->instrs, zero); + + if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) + return false; +@@ -2806,7 +2931,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, + } + + static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, +- struct list *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, ++ struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *log, *mul; +@@ -2861,15 +2986,15 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, + init_value.u[3].f = 1.0f; + if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) + return false; +- list_add_tail(params->instrs, &init->entry); ++ hlsl_block_add_instr(params->instrs, init); + + if (!(store = hlsl_new_simple_store(ctx, var, init))) + return false; +- list_add_tail(params->instrs, &store->entry); ++ hlsl_block_add_instr(params->instrs, store); + + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + return false; +- list_add_tail(params->instrs, &zero->entry); ++ hlsl_block_add_instr(params->instrs, zero); + + /* Diffuse component. */ + if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) +@@ -2877,7 +3002,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, + + if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) + return false; +- list_move_tail(params->instrs, &block.instrs); ++ hlsl_block_add_block(params->instrs, &block); + + /* Specular component. */ + if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) +@@ -2897,11 +3022,11 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, + + if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) + return false; +- list_move_tail(params->instrs, &block.instrs); ++ hlsl_block_add_block(params->instrs, &block); + + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) + return false; +- list_add_tail(params->instrs, &var_load->node.entry); ++ hlsl_block_add_instr(params->instrs, &var_load->node); + + return true; + } +@@ -3034,10 +3159,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *value1, *value2, *mul; + +- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) ++ if (!(value1 = hlsl_add_load_component(ctx, params->instrs, ++ cast1, j * cast1->data_type->dimx + k, loc))) + return false; + +- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) ++ if (!(value2 = hlsl_add_load_component(ctx, params->instrs, ++ cast2, k * cast2->data_type->dimx + i, loc))) + return false; + + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) +@@ -3056,13 +3183,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + + if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) + return false; +- list_move_tail(params->instrs, &block.instrs); ++ hlsl_block_add_block(params->instrs, &block); + } + } + + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return false; +- list_add_tail(params->instrs, &load->node.entry); ++ hlsl_block_add_instr(params->instrs, &load->node); + + return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); + } +@@ -3169,7 +3296,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, + + if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) + return false; +- list_add_tail(params->instrs, &zero->entry); ++ hlsl_block_add_instr(params->instrs, zero); + + /* Check if 0 < arg, cast bool to int */ + +@@ -3229,7 +3356,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, + + if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) + return false; +- list_add_tail(params->instrs, &one->entry); ++ hlsl_block_add_instr(params->instrs, one); + + if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) + return false; +@@ -3242,11 +3369,11 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, + + if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) + return false; +- list_add_tail(params->instrs, &minus_two->entry); ++ hlsl_block_add_instr(params->instrs, minus_two); + + if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) + return false; +- list_add_tail(params->instrs, &three->entry); ++ hlsl_block_add_instr(params->instrs, three); + + if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) + return false; +@@ -3308,7 +3435,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + + if (params->args_count == 4) + { +- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); ++ hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); + } + + sampler_type = params->args[0]->data_type; +@@ -3335,7 +3462,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; +- list_add_tail(params->instrs, &load->entry); ++ hlsl_block_add_instr(params->instrs, load); + return true; + } + +@@ -3369,7 +3496,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + + if ((string = hlsl_type_to_string(ctx, arg_type))) + hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", ++ "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; +@@ -3377,7 +3504,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + + if (arg_type->class == HLSL_CLASS_SCALAR) + { +- list_add_tail(params->instrs, &arg->entry); ++ hlsl_block_add_instr(params->instrs, arg); + return true; + } + +@@ -3393,18 +3520,18 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + { + struct hlsl_block block; + +- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) + return false; + + if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) + return false; +- list_move_tail(params->instrs, &block.instrs); ++ hlsl_block_add_block(params->instrs, &block); + } + } + + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) + return false; +- list_add_tail(params->instrs, &var_load->node.entry); ++ hlsl_block_add_instr(params->instrs, &var_load->node); + + return true; + } +@@ -3444,13 +3571,13 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + + if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) + return false; +- list_add_tail(params->instrs, &c->entry); ++ hlsl_block_add_instr(params->instrs, c); + + if (arg_type->class == HLSL_CLASS_VECTOR) + { + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) + return false; +- list_add_tail(params->instrs, &swizzle->entry); ++ hlsl_block_add_instr(params->instrs, swizzle); + + arg = swizzle; + } +@@ -3458,7 +3585,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) + return false; + +- if (ctx->profile->major_version >= 4) ++ if (shader_profile_version_ge(ctx, 4, 0)) + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); + + return true; +@@ -3482,10 +3609,15 @@ intrinsic_functions[] = + {"asfloat", 1, true, intrinsic_asfloat}, + {"asuint", -1, true, intrinsic_asuint}, + {"clamp", 3, true, intrinsic_clamp}, ++ {"clip", 1, true, intrinsic_clip}, + {"cos", 1, true, intrinsic_cos}, + {"cross", 2, true, intrinsic_cross}, + {"ddx", 1, true, intrinsic_ddx}, ++ {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, ++ {"ddx_fine", 1, true, intrinsic_ddx_fine}, + {"ddy", 1, true, intrinsic_ddy}, ++ {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, ++ {"ddy_fine", 1, true, intrinsic_ddy_fine}, + {"distance", 2, true, intrinsic_distance}, + {"dot", 2, true, intrinsic_dot}, + {"exp", 1, true, intrinsic_exp}, +@@ -3527,7 +3659,14 @@ static int intrinsic_function_name_compare(const void *a, const void *b) + return strcmp(a, func->name); + } + +-static struct list *add_call(struct hlsl_ctx *ctx, const char *name, ++static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ ++ return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); ++} ++ ++static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, + struct parse_initializer *args, const struct vkd3d_shader_location *loc) + { + struct intrinsic_function *intrinsic; +@@ -3561,13 +3700,13 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, + + if (!(store = hlsl_new_simple_store(ctx, param, arg))) + goto fail; +- list_add_tail(args->instrs, &store->entry); ++ hlsl_block_add_instr(args->instrs, store); + } + } + + if (!(call = hlsl_new_call(ctx, decl, loc))) + goto fail; +- list_add_tail(args->instrs, &call->entry); ++ hlsl_block_add_instr(args->instrs, call); + + for (i = 0; i < decl->parameters.count; ++i) + { +@@ -3584,7 +3723,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, + + if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) + goto fail; +- list_add_tail(args->instrs, &load->node.entry); ++ hlsl_block_add_instr(args->instrs, &load->node); + + if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) + goto fail; +@@ -3597,16 +3736,15 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, + + if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) + goto fail; +- list_add_tail(args->instrs, &load->node.entry); ++ hlsl_block_add_instr(args->instrs, &load->node); + } + else + { +- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *expr; + +- if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) ++ if (!(expr = hlsl_new_void_expr(ctx, loc))) + goto fail; +- list_add_tail(args->instrs, &expr->entry); ++ hlsl_block_add_instr(args->instrs, expr); + } + } + else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), +@@ -3662,7 +3800,7 @@ fail: + return NULL; + } + +-static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, ++static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, + struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_load *load; +@@ -3692,7 +3830,7 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type + + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; +- list_add_tail(params->instrs, &load->node.entry); ++ hlsl_block_add_instr(params->instrs, &load->node); + + vkd3d_free(params->args); + return params->instrs; +@@ -3733,7 +3871,7 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct + return false; + } + +-static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +@@ -3761,7 +3899,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru + } + if (multisampled) + { +- if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], ++ if (!(load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) + return false; + } +@@ -3769,7 +3907,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru + assert(offset_dim); + if (params->args_count > 1 + multisampled) + { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } +@@ -3779,7 +3917,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru + } + + /* +1 for the mipmap level for non-multisampled textures */ +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) + return false; + +@@ -3788,11 +3926,11 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; +- list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); + return true; + } + +-static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +@@ -3829,13 +3967,13 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (offset_dim && params->args_count > 2) + { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } +@@ -3851,12 +3989,12 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; +- list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); + + return true; + } + +-static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +@@ -3899,17 +4037,17 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + +- if (!(load_params.cmp = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + load_params.cmp = params->args[2]; + + if (offset_dim && params->args_count > 3) + { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } +@@ -3925,12 +4063,12 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; +- list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); + + return true; + } + +-static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +@@ -3997,7 +4135,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st + } + else if (offset_dim && params->args_count > 2) + { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } +@@ -4022,7 +4160,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + +@@ -4032,11 +4170,187 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; +- list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); ++ return true; ++} ++ ++static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, ++ struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *load; ++ ++ if (!dest) ++ return true; ++ ++ if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) ++ return false; ++ ++ if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) ++ return false; ++ ++ return true; ++} ++ ++static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ const struct hlsl_type *object_type = object->data_type; ++ bool uint_resinfo, has_uint_arg, has_float_arg; ++ struct hlsl_resource_load_params load_params; ++ struct hlsl_ir_node *sample_info, *res_info; ++ struct hlsl_ir_node *zero = NULL, *void_ret; ++ struct hlsl_type *uint_type, *float_type; ++ unsigned int i, j; ++ enum func_argument ++ { ++ ARG_MIP_LEVEL, ++ ARG_WIDTH, ++ ARG_HEIGHT, ++ ARG_ELEMENT_COUNT, ++ ARG_LEVEL_COUNT, ++ ARG_SAMPLE_COUNT, ++ ARG_MAX_ARGS, ++ }; ++ struct hlsl_ir_node *args[ARG_MAX_ARGS] = { 0 }; ++ static const struct overload ++ { ++ enum hlsl_sampler_dim sampler_dim; ++ unsigned int args_count; ++ enum func_argument args[ARG_MAX_ARGS]; ++ } ++ overloads[] = ++ { ++ { HLSL_SAMPLER_DIM_1D, 1, { ARG_WIDTH } }, ++ { HLSL_SAMPLER_DIM_1D, 3, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_1DARRAY, 2, { ARG_WIDTH, ARG_ELEMENT_COUNT } }, ++ { HLSL_SAMPLER_DIM_1DARRAY, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_2D, 2, { ARG_WIDTH, ARG_HEIGHT } }, ++ { HLSL_SAMPLER_DIM_2D, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_2DARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, ++ { HLSL_SAMPLER_DIM_2DARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_3D, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, ++ { HLSL_SAMPLER_DIM_3D, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_CUBE, 2, { ARG_WIDTH, ARG_HEIGHT } }, ++ { HLSL_SAMPLER_DIM_CUBE, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_CUBEARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, ++ { HLSL_SAMPLER_DIM_CUBEARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_2DMS, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_SAMPLE_COUNT } }, ++ { HLSL_SAMPLER_DIM_2DMSARRAY, 4, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_SAMPLE_COUNT } }, ++ }; ++ const struct overload *o = NULL; ++ ++ if (object_type->sampler_dim > HLSL_SAMPLER_DIM_LAST_TEXTURE) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "GetDimensions() is not defined for this type."); ++ } ++ ++ uint_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); ++ float_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT); ++ has_uint_arg = has_float_arg = false; ++ for (i = 0; i < ARRAY_SIZE(overloads); ++i) ++ { ++ const struct overload *iter = &overloads[i]; ++ ++ if (iter->sampler_dim == object_type->sampler_dim && iter->args_count == params->args_count) ++ { ++ for (j = 0; j < params->args_count; ++j) ++ { ++ args[iter->args[j]] = params->args[j]; ++ ++ /* Input parameter. */ ++ if (iter->args[j] == ARG_MIP_LEVEL) ++ { ++ if (!(args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) ++ { ++ return false; ++ } ++ ++ continue; ++ } ++ ++ has_float_arg |= hlsl_types_are_equal(params->args[j]->data_type, float_type); ++ has_uint_arg |= hlsl_types_are_equal(params->args[j]->data_type, uint_type); ++ ++ if (params->args[j]->data_type->class != HLSL_CLASS_SCALAR) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected scalar arguments."); ++ break; ++ } ++ } ++ o = iter; ++ break; ++ } ++ } ++ uint_resinfo = !has_float_arg && has_uint_arg; ++ ++ if (!o) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, object_type))) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Unexpected number of arguments %u for %s.%s().", params->args_count, string->buffer, name); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ } ++ ++ if (!args[ARG_MIP_LEVEL]) ++ { ++ if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) ++ return false; ++ hlsl_block_add_instr(block, zero); ++ args[ARG_MIP_LEVEL] = zero; ++ } ++ ++ memset(&load_params, 0, sizeof(load_params)); ++ load_params.type = HLSL_RESOURCE_RESINFO; ++ load_params.resource = object; ++ load_params.lod = args[ARG_MIP_LEVEL]; ++ load_params.format = hlsl_get_vector_type(ctx, uint_resinfo ? HLSL_TYPE_UINT : HLSL_TYPE_FLOAT, 4); ++ ++ if (!(res_info = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ hlsl_block_add_instr(block, res_info); ++ ++ if (!add_assignment_from_component(ctx, block, args[ARG_WIDTH], res_info, 0, loc)) ++ return false; ++ ++ if (!add_assignment_from_component(ctx, block, args[ARG_HEIGHT], res_info, 1, loc)) ++ return false; ++ ++ if (!add_assignment_from_component(ctx, block, args[ARG_ELEMENT_COUNT], res_info, ++ object_type->sampler_dim == HLSL_SAMPLER_DIM_1DARRAY ? 1 : 2, loc)) ++ { ++ return false; ++ } ++ ++ if (!add_assignment_from_component(ctx, block, args[ARG_LEVEL_COUNT], res_info, 3, loc)) ++ return false; ++ ++ if (args[ARG_SAMPLE_COUNT]) ++ { ++ memset(&load_params, 0, sizeof(load_params)); ++ load_params.type = HLSL_RESOURCE_SAMPLE_INFO; ++ load_params.resource = object; ++ load_params.format = args[ARG_SAMPLE_COUNT]->data_type; ++ if (!(sample_info = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ hlsl_block_add_instr(block, sample_info); ++ ++ if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info)) ++ return false; ++ } ++ ++ if (!(void_ret = hlsl_new_void_expr(ctx, loc))) ++ return false; ++ hlsl_block_add_instr(block, void_ret); ++ + return true; + } + +-static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +@@ -4078,17 +4392,17 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.coords = params->args[1]; + +- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.lod = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + load_params.lod = params->args[2]; + + if (offset_dim && params->args_count > 3) + { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } +@@ -4102,11 +4416,11 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; +- list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); + return true; + } + +-static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +@@ -4145,21 +4459,21 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr + return false; + } + +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.coords = params->args[1]; + +- if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddx = params->args[2]; + +- if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], ++ if (!(load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddy = params->args[3]; + + if (offset_dim && params->args_count > 4) + { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } +@@ -4173,14 +4487,14 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; +- list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); + return true; + } + + static const struct method_function + { + const char *name; +- bool (*handler)(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++ bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); + } + object_methods[] = +@@ -4191,6 +4505,8 @@ object_methods[] = + { "GatherGreen", add_gather_method_call }, + { "GatherRed", add_gather_method_call }, + ++ { "GetDimensions", add_getdimensions_method_call }, ++ + { "Load", add_load_method_call }, + + { "Sample", add_sample_method_call }, +@@ -4208,7 +4524,7 @@ static int object_method_function_name_compare(const void *a, const void *b) + return strcmp(a, func->name); + } + +-static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +@@ -4229,7 +4545,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl + if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), + sizeof(*method), object_method_function_name_compare))) + { +- return method->handler(ctx, instrs, object, name, params, loc); ++ return method->handler(ctx, block, object, name, params, loc); + } + else + { +@@ -4272,6 +4588,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + char *name; + DWORD modifiers; + struct hlsl_ir_node *instr; ++ struct hlsl_block *block; + struct list *list; + struct parse_fields fields; + struct parse_function function; +@@ -4399,38 +4716,9 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %token C_INTEGER + %token PRE_LINE + +-%type add_expr +-%type assignment_expr +-%type bitand_expr +-%type bitor_expr +-%type bitxor_expr +-%type compound_statement +-%type conditional_expr +-%type declaration +-%type declaration_statement +-%type discard_statement +-%type equality_expr +-%type expr +-%type expr_optional +-%type expr_statement +-%type initializer_expr +-%type jump_statement +-%type logicand_expr +-%type logicor_expr +-%type loop_statement +-%type mul_expr +-%type postfix_expr +-%type primary_expr +-%type relational_expr +-%type selection_statement +-%type shift_expr +-%type statement +-%type statement_list +-%type struct_declaration + %type type_specs +-%type unary_expr + %type variables_def +-%type variables_def_optional ++%type variables_def_typed + + %token VAR_IDENTIFIER + %token NEW_IDENTIFIER +@@ -4446,6 +4734,35 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %type attribute_list + %type attribute_list_optional + ++%type add_expr ++%type assignment_expr ++%type bitand_expr ++%type bitor_expr ++%type bitxor_expr ++%type compound_statement ++%type conditional_expr ++%type declaration ++%type declaration_statement ++%type equality_expr ++%type expr ++%type expr_optional ++%type expr_statement ++%type initializer_expr ++%type jump_statement ++%type logicand_expr ++%type logicor_expr ++%type loop_statement ++%type mul_expr ++%type postfix_expr ++%type primary_expr ++%type relational_expr ++%type shift_expr ++%type selection_statement ++%type statement ++%type statement_list ++%type struct_declaration_without_vars ++%type unary_expr ++ + %type boolean + + %type buffer_type +@@ -4493,6 +4810,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %type type_spec + %type variable_decl + %type variable_def ++%type variable_def_typed + + %% + +@@ -4502,9 +4820,9 @@ hlsl_prog: + | hlsl_prog buffer_declaration buffer_body + | hlsl_prog declaration_statement + { +- if (!list_empty($2)) ++ if (!list_empty(&$2->instrs)) + hlsl_fixme(ctx, &@2, "Uniform initializer."); +- destroy_instr_list($2); ++ destroy_block($2); + } + | hlsl_prog preproc_directive + | hlsl_prog ';' +@@ -4561,25 +4879,19 @@ preproc_directive: + } + } + +-struct_declaration: +- var_modifiers struct_spec variables_def_optional ';' ++struct_declaration_without_vars: ++ var_modifiers struct_spec ';' + { +- struct hlsl_type *type; +- unsigned int modifiers = $1; ++ if (!$2->name) ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Anonymous struct type must declare a variable."); + +- if (!$3) +- { +- if (!$2->name) +- hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "Anonymous struct type must declare a variable."); +- if (modifiers) +- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +- "Modifiers are not allowed on struct type declarations."); +- } ++ if ($1) ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Modifiers are not allowed on struct type declarations."); + +- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; +- $$ = declare_vars(ctx, type, modifiers, &@1, $3); + } + + struct_spec: +@@ -4686,7 +4998,7 @@ attribute: + YYABORT; + } + $$->name = $2; +- list_init(&$$->instrs); ++ hlsl_block_init(&$$->instrs); + $$->loc = @$; + $$->args_count = 0; + } +@@ -4701,8 +5013,8 @@ attribute: + YYABORT; + } + $$->name = $2; +- list_init(&$$->instrs); +- list_move_tail(&$$->instrs, $4.instrs); ++ hlsl_block_init(&$$->instrs); ++ hlsl_block_add_block(&$$->instrs, $4.instrs); + vkd3d_free($4.instrs); + $$->loc = @$; + $$->args_count = $4.args_count; +@@ -4758,15 +5070,15 @@ func_declaration: + "Function \"%s\" is already defined.", decl->func->name); + hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, + "\"%s\" was previously defined here.", decl->func->name); +- hlsl_free_instr_list($2); ++ destroy_block($2); + } + else + { + size_t i; + + decl->has_body = true; +- list_move_tail(&decl->body.instrs, $2); +- vkd3d_free($2); ++ hlsl_block_add_block(&decl->body, $2); ++ destroy_block($2); + + /* Semantics are taken from whichever definition has a body. + * We can't just replace the hlsl_ir_var pointers, though: if +@@ -4943,7 +5255,7 @@ func_prototype: + compound_statement: + '{' '}' + { +- if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; + } + | '{' scope_start statement_list '}' +@@ -5261,7 +5573,12 @@ type_no_void: + { + validate_texture_format_type(ctx, $3, &@3); + +- /* TODO: unspecified sample count is not allowed for all targets */ ++ if (shader_profile_version_lt(ctx, 4, 1)) ++ { ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); ++ } ++ + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); + } + | texture_ms_type '<' type ',' shift_expr '>' +@@ -5270,7 +5587,7 @@ type_no_void: + struct hlsl_block block; + + hlsl_block_init(&block); +- list_move_tail(&block.instrs, $5); ++ hlsl_block_add_block(&block, $5); + + sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5); + +@@ -5325,7 +5642,7 @@ type_no_void: + $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); + if ($$->is_minimum_precision) + { +- if (ctx->profile->major_version < 4) ++ if (shader_profile_version_lt(ctx, 4, 0)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Target profile doesn't support minimum-precision types."); +@@ -5354,10 +5671,10 @@ type: + + declaration_statement: + declaration +- | struct_declaration ++ | struct_declaration_without_vars + | typedef + { +- if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; + } + +@@ -5416,23 +5733,12 @@ type_spec: + } + + declaration: +- var_modifiers type variables_def ';' ++ variables_def_typed ';' + { +- struct hlsl_type *type; +- unsigned int modifiers = $1; +- +- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) ++ if (!($$ = initialize_vars(ctx, $1))) + YYABORT; +- $$ = declare_vars(ctx, type, modifiers, &@1, $3); + } + +-variables_def_optional: +- %empty +- { +- $$ = NULL; +- } +- | variables_def +- + variables_def: + variable_def + { +@@ -5446,6 +5752,33 @@ variables_def: + list_add_tail($$, &$3->entry); + } + ++variables_def_typed: ++ variable_def_typed ++ { ++ if (!($$ = make_empty_list(ctx))) ++ YYABORT; ++ list_add_head($$, &$1->entry); ++ ++ declare_var(ctx, $1); ++ } ++ | variables_def_typed ',' variable_def ++ { ++ struct parse_variable_def *head_def; ++ ++ assert(!list_empty($1)); ++ head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); ++ ++ assert(head_def->basic_type); ++ $3->basic_type = head_def->basic_type; ++ $3->modifiers = head_def->modifiers; ++ $3->modifiers_loc = head_def->modifiers_loc; ++ ++ declare_var(ctx, $3); ++ ++ $$ = $1; ++ list_add_tail($$, &$3->entry); ++ } ++ + variable_decl: + any_identifier arrays colon_attribute + { +@@ -5461,7 +5794,7 @@ state: + any_identifier '=' expr ';' + { + vkd3d_free($1); +- hlsl_free_instr_list($3); ++ destroy_block($3); + } + + state_block_start: +@@ -5487,6 +5820,38 @@ variable_def: + ctx->in_state_block = 0; + } + ++variable_def_typed: ++ var_modifiers struct_spec variable_def ++ { ++ unsigned int modifiers = $1; ++ struct hlsl_type *type; ++ ++ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) ++ YYABORT; ++ ++ check_invalid_in_out_modifiers(ctx, modifiers, &@1); ++ ++ $$ = $3; ++ $$->basic_type = type; ++ $$->modifiers = modifiers; ++ $$->modifiers_loc = @1; ++ } ++ | var_modifiers type variable_def ++ { ++ unsigned int modifiers = $1; ++ struct hlsl_type *type; ++ ++ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) ++ YYABORT; ++ ++ check_invalid_in_out_modifiers(ctx, modifiers, &@1); ++ ++ $$ = $3; ++ $$->basic_type = type; ++ $$->modifiers = modifiers; ++ $$->modifiers_loc = @1; ++ } ++ + arrays: + %empty + { +@@ -5495,17 +5860,12 @@ arrays: + } + | '[' expr ']' arrays + { +- struct hlsl_block block; + uint32_t *new_array; + unsigned int size; + +- hlsl_clone_block(ctx, &block, &ctx->static_initializers); +- list_move_tail(&block.instrs, $2); +- +- size = evaluate_static_expression_as_uint(ctx, &block, &@2); ++ size = evaluate_static_expression_as_uint(ctx, $2, &@2); + +- hlsl_block_cleanup(&block); +- vkd3d_free($2); ++ destroy_block($2); + + $$ = $4; + +@@ -5618,10 +5978,10 @@ complex_initializer: + $$.args_count = 1; + if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) + { +- destroy_instr_list($1); ++ destroy_block($1); + YYABORT; + } +- $$.args[0] = node_from_list($1); ++ $$.args[0] = node_from_block($1); + $$.instrs = $1; + $$.braces = false; + } +@@ -5653,7 +6013,7 @@ complex_initializer_list: + $$.args = new_args; + for (i = 0; i < $3.args_count; ++i) + $$.args[$$.args_count++] = $3.args[i]; +- list_move_tail($$.instrs, $3.instrs); ++ hlsl_block_add_block($$.instrs, $3.instrs); + free_parse_initializer(&$3); + } + +@@ -5666,10 +6026,10 @@ initializer_expr_list: + $$.args_count = 1; + if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) + { +- destroy_instr_list($1); ++ destroy_block($1); + YYABORT; + } +- $$.args[0] = node_from_list($1); ++ $$.args[0] = node_from_block($1); + $$.instrs = $1; + $$.braces = false; + } +@@ -5681,13 +6041,13 @@ initializer_expr_list: + if (!(new_args = hlsl_realloc(ctx, $$.args, ($$.args_count + 1) * sizeof(*$$.args)))) + { + free_parse_initializer(&$$); +- destroy_instr_list($3); ++ destroy_block($3); + YYABORT; + } + $$.args = new_args; +- $$.args[$$.args_count++] = node_from_list($3); +- list_move_tail($$.instrs, $3); +- vkd3d_free($3); ++ $$.args[$$.args_count++] = node_from_block($3); ++ hlsl_block_add_block($$.instrs, $3); ++ destroy_block($3); + } + + boolean: +@@ -5705,15 +6065,14 @@ statement_list: + | statement_list statement + { + $$ = $1; +- list_move_tail($$, $2); +- vkd3d_free($2); ++ hlsl_block_add_block($$, $2); ++ destroy_block($2); + } + + statement: + declaration_statement + | expr_statement + | compound_statement +- | discard_statement + | jump_statement + | selection_statement + | loop_statement +@@ -5721,47 +6080,47 @@ statement: + jump_statement: + KW_RETURN expr ';' + { +- if (!add_return(ctx, $2, node_from_list($2), &@1)) +- YYABORT; + $$ = $2; ++ if (!add_return(ctx, $$, node_from_block($$), &@1)) ++ YYABORT; + } + | KW_RETURN ';' + { +- if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; + if (!add_return(ctx, $$, NULL, &@1)) + YYABORT; + } +- +-discard_statement: +- KW_DISCARD ';' ++ | KW_DISCARD ';' + { +- struct hlsl_ir_node *discard; ++ struct hlsl_ir_node *discard, *c; + +- if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; +- if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) ++ ++ if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) + return false; +- list_add_tail($$, &discard->entry); ++ hlsl_block_add_instr($$, c); ++ ++ if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) ++ return false; ++ hlsl_block_add_instr($$, discard); + } + + selection_statement: + KW_IF '(' expr ')' if_body + { +- struct hlsl_ir_node *condition = node_from_list($3); +- struct hlsl_block then_block, else_block; ++ struct hlsl_ir_node *condition = node_from_block($3); + struct hlsl_ir_node *instr; + +- hlsl_block_init(&then_block); +- list_move_tail(&then_block.instrs, $5.then_block); +- hlsl_block_init(&else_block); +- if ($5.else_block) +- list_move_tail(&else_block.instrs, $5.else_block); +- vkd3d_free($5.then_block); +- vkd3d_free($5.else_block); +- +- if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) ++ if (!(instr = hlsl_new_if(ctx, condition, $5.then_block, $5.else_block, &@1))) ++ { ++ destroy_block($5.then_block); ++ destroy_block($5.else_block); + YYABORT; ++ } ++ destroy_block($5.then_block); ++ destroy_block($5.else_block); + if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) + { + struct vkd3d_string_buffer *string; +@@ -5772,7 +6131,7 @@ selection_statement: + hlsl_release_string_buffer(ctx, string); + } + $$ = $3; +- list_add_tail($$, &instr->entry); ++ hlsl_block_add_instr($$, instr); + } + + if_body: +@@ -5810,7 +6169,7 @@ loop_statement: + expr_optional: + %empty + { +- if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; + } + | expr +@@ -5826,7 +6185,7 @@ func_arguments: + { + $$.args = NULL; + $$.args_count = 0; +- if (!($$.instrs = make_empty_list(ctx))) ++ if (!($$.instrs = make_empty_block(ctx))) + YYABORT; + $$.braces = false; + } +@@ -5839,7 +6198,7 @@ primary_expr: + + if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) + YYABORT; +- if (!($$ = make_list(ctx, c))) ++ if (!($$ = make_block(ctx, c))) + YYABORT; + } + | C_INTEGER +@@ -5848,7 +6207,7 @@ primary_expr: + + if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) + YYABORT; +- if (!($$ = make_list(ctx, c))) ++ if (!($$ = make_block(ctx, c))) + YYABORT; + } + | boolean +@@ -5857,7 +6216,7 @@ primary_expr: + + if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) + YYABORT; +- if (!($$ = make_list(ctx, c))) ++ if (!($$ = make_block(ctx, c))) + { + hlsl_free_instr(c); + YYABORT; +@@ -5875,7 +6234,7 @@ primary_expr: + } + if (!(load = hlsl_new_var_load(ctx, var, &@1))) + YYABORT; +- if (!($$ = make_list(ctx, &load->node))) ++ if (!($$ = make_block(ctx, &load->node))) + YYABORT; + } + | '(' expr ')' +@@ -5903,7 +6262,7 @@ primary_expr: + YYABORT; + if (!(load = hlsl_new_var_load(ctx, var, &@1))) + YYABORT; +- if (!($$ = make_list(ctx, &load->node))) ++ if (!($$ = make_block(ctx, &load->node))) + YYABORT; + } + else +@@ -5919,7 +6278,7 @@ postfix_expr: + { + if (!add_increment(ctx, $1, false, true, &@2)) + { +- destroy_instr_list($1); ++ destroy_block($1); + YYABORT; + } + $$ = $1; +@@ -5928,14 +6287,14 @@ postfix_expr: + { + if (!add_increment(ctx, $1, true, true, &@2)) + { +- destroy_instr_list($1); ++ destroy_block($1); + YYABORT; + } + $$ = $1; + } + | postfix_expr '.' any_identifier + { +- struct hlsl_ir_node *node = node_from_list($1); ++ struct hlsl_ir_node *node = node_from_block($1); + + if (node->data_type->class == HLSL_CLASS_STRUCT) + { +@@ -5963,7 +6322,7 @@ postfix_expr: + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); + YYABORT; + } +- list_add_tail($1, &swizzle->entry); ++ hlsl_block_add_instr($1, swizzle); + $$ = $1; + } + else +@@ -5974,17 +6333,17 @@ postfix_expr: + } + | postfix_expr '[' expr ']' + { +- struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); ++ struct hlsl_ir_node *array = node_from_block($1), *index = node_from_block($3); + +- list_move_head($1, $3); +- vkd3d_free($3); ++ hlsl_block_add_block($3, $1); ++ destroy_block($1); + +- if (!add_array_access(ctx, $1, array, index, &@2)) ++ if (!add_array_access(ctx, $3, array, index, &@2)) + { +- destroy_instr_list($1); ++ destroy_block($3); + YYABORT; + } +- $$ = $1; ++ $$ = $3; + } + + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ +@@ -6025,14 +6384,14 @@ postfix_expr: + } + | postfix_expr '.' any_identifier '(' func_arguments ')' + { +- struct hlsl_ir_node *object = node_from_list($1); ++ struct hlsl_ir_node *object = node_from_block($1); + +- list_move_tail($1, $5.instrs); ++ hlsl_block_add_block($1, $5.instrs); + vkd3d_free($5.instrs); + + if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) + { +- hlsl_free_instr_list($1); ++ destroy_block($1); + vkd3d_free($5.args); + YYABORT; + } +@@ -6046,7 +6405,7 @@ unary_expr: + { + if (!add_increment(ctx, $2, false, false, &@1)) + { +- destroy_instr_list($2); ++ destroy_block($2); + YYABORT; + } + $$ = $2; +@@ -6055,7 +6414,7 @@ unary_expr: + { + if (!add_increment(ctx, $2, true, false, &@1)) + { +- destroy_instr_list($2); ++ destroy_block($2); + YYABORT; + } + $$ = $2; +@@ -6066,23 +6425,23 @@ unary_expr: + } + | '-' unary_expr + { +- add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_list($2), &@1); ++ add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_block($2), &@1); + $$ = $2; + } + | '~' unary_expr + { +- add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_list($2), &@1); ++ add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_block($2), &@1); + $$ = $2; + } + | '!' unary_expr + { +- add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_list($2), &@1); ++ add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_block($2), &@1); + $$ = $2; + } + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ + | '(' var_modifiers type arrays ')' unary_expr + { +- struct hlsl_type *src_type = node_from_list($6)->data_type; ++ struct hlsl_type *src_type = node_from_block($6)->data_type; + struct hlsl_type *dst_type; + unsigned int i; + +@@ -6118,9 +6477,9 @@ unary_expr: + YYABORT; + } + +- if (!add_cast(ctx, $6, node_from_list($6), dst_type, &@3)) ++ if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) + { +- hlsl_free_instr_list($6); ++ destroy_block($6); + YYABORT; + } + $$ = $6; +@@ -6130,120 +6489,121 @@ mul_expr: + unary_expr + | mul_expr '*' unary_expr + { +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); + } + | mul_expr '/' unary_expr + { +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); + } + | mul_expr '%' unary_expr + { +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); + } + + add_expr: + mul_expr + | add_expr '+' mul_expr + { +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + } + | add_expr '-' mul_expr + { + struct hlsl_ir_node *neg; + +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) ++ if (!(neg = add_unary_arithmetic_expr(ctx, $3, HLSL_OP1_NEG, node_from_block($3), &@2))) + YYABORT; +- list_add_tail($3, &neg->entry); +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + } + + shift_expr: + add_expr + | shift_expr OP_LEFTSHIFT add_expr + { +- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); + } + | shift_expr OP_RIGHTSHIFT add_expr + { +- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); + } + + relational_expr: + shift_expr + | relational_expr '<' shift_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); + } + | relational_expr '>' shift_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); ++ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); + } + | relational_expr OP_LE shift_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); ++ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); + } + | relational_expr OP_GE shift_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); + } + + equality_expr: + relational_expr + | equality_expr OP_EQ relational_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); + } + | equality_expr OP_NE relational_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); + } + + bitand_expr: + equality_expr + | bitand_expr '&' equality_expr + { +- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); + } + + bitxor_expr: + bitand_expr + | bitxor_expr '^' bitand_expr + { +- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); + } + + bitor_expr: + bitxor_expr + | bitor_expr '|' bitxor_expr + { +- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); + } + + logicand_expr: + bitor_expr + | logicand_expr OP_AND bitor_expr + { +- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); + } + + logicor_expr: + logicand_expr + | logicor_expr OP_OR logicand_expr + { +- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); + } + + conditional_expr: + logicor_expr + | logicor_expr '?' expr ':' assignment_expr + { +- struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); ++ struct hlsl_ir_node *cond = node_from_block($1); ++ struct hlsl_ir_node *first = node_from_block($3); ++ struct hlsl_ir_node *second = node_from_block($5); + struct hlsl_type *common_type; + +- list_move_tail($1, $3); +- list_move_tail($1, $5); +- vkd3d_free($3); +- vkd3d_free($5); ++ hlsl_block_add_block($1, $3); ++ hlsl_block_add_block($1, $5); ++ destroy_block($3); ++ destroy_block($5); + + if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) + YYABORT; +@@ -6264,15 +6624,15 @@ assignment_expr: + conditional_expr + | unary_expr assign_op assignment_expr + { +- struct hlsl_ir_node *lhs = node_from_list($1), *rhs = node_from_list($3); ++ struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); + + if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); + YYABORT; + } +- list_move_tail($3, $1); +- vkd3d_free($1); ++ hlsl_block_add_block($3, $1); ++ destroy_block($1); + if (!add_assignment(ctx, $3, lhs, $2, rhs)) + YYABORT; + $$ = $3; +@@ -6329,6 +6689,6 @@ expr: + | expr ',' assignment_expr + { + $$ = $1; +- list_move_tail($$, $3); +- vkd3d_free($3); ++ hlsl_block_add_block($$, $3); ++ destroy_block($3); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 765b1907426..bfa605f4ba7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -97,6 +97,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str + static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc) + { ++ enum hlsl_regset regset = hlsl_type_get_regset(deref->data_type); + struct hlsl_ir_node *offset = NULL; + struct hlsl_type *type; + unsigned int i; +@@ -111,7 +112,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st + struct hlsl_block idx_block; + + if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, +- deref->offset_regset, loc))) ++ regset, loc))) + return NULL; + + hlsl_block_add_block(block, &idx_block); +@@ -126,7 +127,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st + static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + struct hlsl_ir_node *instr) + { +- const struct hlsl_type *type; ++ struct hlsl_type *type; + struct hlsl_ir_node *offset; + struct hlsl_block block; + +@@ -145,7 +146,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der + return true; + } + +- deref->offset_regset = hlsl_type_get_regset(type); ++ deref->data_type = type; + + if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) + return false; +@@ -160,7 +161,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der + /* Split uniforms into two variables representing the constant and temp + * registers, and copy the former to the latter, so that writes to uniforms + * work. */ +-static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) ++static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) + { + struct vkd3d_string_buffer *name; + struct hlsl_ir_var *uniform; +@@ -187,7 +188,7 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru + + if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) + return; +- list_add_head(instrs, &load->node.entry); ++ list_add_head(&block->instrs, &load->node.entry); + + if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) + return; +@@ -300,7 +301,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + return ext_var; + } + +-static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, ++static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; +@@ -363,7 +364,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct + } + } + +-static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, ++static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct vkd3d_shader_location *loc = &lhs->node.loc; +@@ -405,30 +406,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs + return; + list_add_after(&c->entry, &element_load->node.entry); + +- prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); ++ prepend_input_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { +- prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); ++ prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); + } + } + + /* Split inputs into two variables representing the semantic and temp registers, + * and copy the former to the latter, so that writes to input variables work. */ +-static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) ++static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) + { + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; +- list_add_head(instrs, &load->node.entry); ++ list_add_head(&block->instrs, &load->node.entry); + +- prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + +-static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, ++static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct hlsl_type *type = rhs->node.data_type, *vector_type; +@@ -463,11 +464,11 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct + { + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) + return; +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); + } + else + { +@@ -475,16 +476,16 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) + return; +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); + } + + if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) + return; +- list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + } + } + +-static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, ++static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct vkd3d_shader_location *loc = &rhs->node.loc; +@@ -519,34 +520,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs + + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); + + if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) + return; +- list_add_tail(instrs, &element_load->node.entry); ++ hlsl_block_add_instr(block, &element_load->node); + +- append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); ++ append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { +- append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); ++ append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); + } + } + + /* Split outputs into two variables representing the temp and semantic + * registers, and copy the former to the latter, so that reads from output + * variables work. */ +-static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) ++static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) + { + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); + +- append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + + bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), +@@ -573,6 +574,37 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, + return progress; + } + ++typedef bool (*PFN_lower_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *); ++ ++static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ PFN_lower_func func = context; ++ struct hlsl_block block; ++ ++ hlsl_block_init(&block); ++ if (func(ctx, instr, &block)) ++ { ++ struct hlsl_ir_node *replacement = LIST_ENTRY(list_tail(&block.instrs), struct hlsl_ir_node, entry); ++ ++ list_move_before(&instr->entry, &block.instrs); ++ hlsl_replace_node(instr, replacement); ++ return true; ++ } ++ else ++ { ++ hlsl_block_cleanup(&block); ++ return false; ++ } ++} ++ ++/* Specific form of transform_ir() for passes which convert a single instruction ++ * to a block of one or more instructions. This helper takes care of setting up ++ * the block and calling hlsl_replace_node_with_block(). */ ++static bool lower_ir(struct hlsl_ctx *ctx, PFN_lower_func func, struct hlsl_block *block) ++{ ++ return hlsl_transform_ir(ctx, call_lower_func, block, func); ++} ++ + static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + bool res; +@@ -666,7 +698,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, + return; + list_add_after(&cf_instr->entry, &load->node.entry); + +- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) + return; + hlsl_block_add_instr(&then_block, jump); + +@@ -1689,7 +1721,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ + { + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + +- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if (!load->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Loaded resource must have a single uniform source."); +@@ -1704,7 +1736,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ + + if (load->sampler.var) + { +- if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if (!load->sampler.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Resource load sampler must have a single uniform source."); +@@ -1722,7 +1754,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ + { + struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); + +- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if (!store->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Accessed resource must have a single uniform source."); +@@ -1889,7 +1921,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + + if (rhs->type != HLSL_IR_LOAD) + { +- hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); ++ hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); + return false; + } + +@@ -2066,6 +2098,137 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir + return false; + } + ++/* Lower combined samples and sampler variables to synthesized separated textures and samplers. ++ * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ ++static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_resource_load *load; ++ struct vkd3d_string_buffer *name; ++ struct hlsl_ir_var *var; ++ unsigned int i; ++ ++ if (instr->type != HLSL_IR_RESOURCE_LOAD) ++ return false; ++ load = hlsl_ir_resource_load(instr); ++ ++ switch (load->load_type) ++ { ++ case HLSL_RESOURCE_LOAD: ++ case HLSL_RESOURCE_GATHER_RED: ++ case HLSL_RESOURCE_GATHER_GREEN: ++ case HLSL_RESOURCE_GATHER_BLUE: ++ case HLSL_RESOURCE_GATHER_ALPHA: ++ case HLSL_RESOURCE_RESINFO: ++ case HLSL_RESOURCE_SAMPLE_CMP: ++ case HLSL_RESOURCE_SAMPLE_CMP_LZ: ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ case HLSL_RESOURCE_SAMPLE_INFO: ++ return false; ++ ++ case HLSL_RESOURCE_SAMPLE: ++ case HLSL_RESOURCE_SAMPLE_LOD: ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ break; ++ } ++ if (load->sampler.var) ++ return false; ++ ++ if (!hlsl_type_is_resource(load->resource.var->data_type)) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs."); ++ return false; ++ } ++ ++ assert(hlsl_type_get_regset(load->resource.var->data_type) == HLSL_REGSET_SAMPLERS); ++ ++ if (!(name = hlsl_get_string_buffer(ctx))) ++ return false; ++ vkd3d_string_buffer_printf(name, "%s", load->resource.var->name); ++ ++ TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); ++ ++ if (!(var = hlsl_get_var(ctx->globals, name->buffer))) ++ { ++ struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim, ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); ++ ++ /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ ++ struct hlsl_type *arr_type = load->resource.var->data_type; ++ for (i = 0; i < load->resource.path_len; ++i) ++ { ++ assert(arr_type->class == HLSL_CLASS_ARRAY); ++ texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); ++ arr_type = arr_type->e.array.type; ++ } ++ ++ if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc, false))) ++ { ++ hlsl_release_string_buffer(ctx, name); ++ return false; ++ } ++ var->is_uniform = 1; ++ var->is_separated_resource = true; ++ ++ list_add_tail(&ctx->extern_vars, &var->extern_entry); ++ } ++ hlsl_release_string_buffer(ctx, name); ++ ++ if (load->sampling_dim != var->data_type->sampler_dim) ++ { ++ hlsl_error(ctx, &load->node.loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, ++ "Cannot split combined samplers from \"%s\" if they have different usage dimensions.", ++ load->resource.var->name); ++ hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, "First use as combined sampler is here."); ++ return false; ++ ++ } ++ ++ hlsl_copy_deref(ctx, &load->sampler, &load->resource); ++ load->resource.var = var; ++ assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); ++ assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); ++ ++ return true; ++} ++ ++static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl_ir_var *to_add, ++ enum hlsl_regset regset) ++{ ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(var, list, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->bind_count[regset] < to_add->bind_count[regset]) ++ { ++ list_add_before(&var->extern_entry, &to_add->extern_entry); ++ return; ++ } ++ } ++ ++ list_add_tail(list, &to_add->extern_entry); ++} ++ ++static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) ++{ ++ struct list separated_resources; ++ struct hlsl_ir_var *var, *next; ++ ++ list_init(&separated_resources); ++ ++ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_separated_resource) ++ { ++ list_remove(&var->extern_entry); ++ insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_TEXTURES); ++ } ++ } ++ ++ list_move_head(&ctx->extern_vars, &separated_resources); ++ ++ return false; ++} ++ + /* Lower DIV to RCP + MUL. */ + static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { +@@ -2264,7 +2427,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + return true; + } + +-struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, ++struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) + { + struct hlsl_block then_block, else_block; +@@ -2290,18 +2453,18 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *ins + + if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) + return NULL; +- list_add_tail(instrs, &iff->entry); ++ hlsl_block_add_instr(instrs, iff); + + if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) + return NULL; +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(instrs, &load->node); + + return &load->node; + } + +-static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { +- struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; ++ struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; + struct hlsl_type *type = instr->data_type, *utype; + struct hlsl_constant_value high_bit_value; + struct hlsl_ir_expr *expr; +@@ -2322,56 +2485,52 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + + if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) + return false; +- list_add_before(&instr->entry, &xor->entry); ++ hlsl_block_add_instr(block, xor); + + for (i = 0; i < type->dimx; ++i) + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; +- list_add_before(&instr->entry, &high_bit->entry); ++ hlsl_block_add_instr(block, high_bit); + + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) + return false; +- list_add_before(&instr->entry, &and->entry); ++ hlsl_block_add_instr(block, and); + + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) + return false; +- list_add_before(&instr->entry, &abs1->entry); ++ hlsl_block_add_instr(block, abs1); + + if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast1->entry); ++ hlsl_block_add_instr(block, cast1); + + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) + return false; +- list_add_before(&instr->entry, &abs2->entry); ++ hlsl_block_add_instr(block, abs2); + + if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast2->entry); ++ hlsl_block_add_instr(block, cast2); + + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) + return false; +- list_add_before(&instr->entry, &div->entry); ++ hlsl_block_add_instr(block, div); + + if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast3->entry); ++ hlsl_block_add_instr(block, cast3); + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) + return false; +- list_add_before(&instr->entry, &neg->entry); ++ hlsl_block_add_instr(block, neg); + +- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) +- return false; +- hlsl_replace_node(instr, cond); +- +- return true; ++ return hlsl_add_conditional(ctx, block, and, neg, cast3); + } + +-static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { +- struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; ++ struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; + struct hlsl_type *type = instr->data_type, *utype; + struct hlsl_constant_value high_bit_value; + struct hlsl_ir_expr *expr; +@@ -2394,45 +2553,41 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; +- list_add_before(&instr->entry, &high_bit->entry); ++ hlsl_block_add_instr(block, high_bit); + + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) + return false; +- list_add_before(&instr->entry, &and->entry); ++ hlsl_block_add_instr(block, and); + + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) + return false; +- list_add_before(&instr->entry, &abs1->entry); ++ hlsl_block_add_instr(block, abs1); + + if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast1->entry); ++ hlsl_block_add_instr(block, cast1); + + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) + return false; +- list_add_before(&instr->entry, &abs2->entry); ++ hlsl_block_add_instr(block, abs2); + + if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast2->entry); ++ hlsl_block_add_instr(block, cast2); + + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) + return false; +- list_add_before(&instr->entry, &div->entry); ++ hlsl_block_add_instr(block, div); + + if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast3->entry); ++ hlsl_block_add_instr(block, cast3); + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) + return false; +- list_add_before(&instr->entry, &neg->entry); ++ hlsl_block_add_instr(block, neg); + +- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) +- return false; +- hlsl_replace_node(instr, cond); +- +- return true; ++ return hlsl_add_conditional(ctx, block, and, neg, cast3); + } + + static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -2516,9 +2671,9 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void + return false; + } + +-static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { +- struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; ++ struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one, *mul3; + struct hlsl_type *type = instr->data_type, *btype; + struct hlsl_constant_value one_value; + struct hlsl_ir_expr *expr; +@@ -2539,47 +2694,100 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + + if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) + return false; +- list_add_before(&instr->entry, &mul1->entry); ++ hlsl_block_add_instr(block, mul1); + + if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) + return false; +- list_add_before(&instr->entry, &neg1->entry); ++ hlsl_block_add_instr(block, neg1); + + if (!(ge = hlsl_new_binary_expr(ctx, HLSL_OP2_GEQUAL, mul1, neg1))) + return false; + ge->data_type = btype; +- list_add_before(&instr->entry, &ge->entry); ++ hlsl_block_add_instr(block, ge); + + if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) + return false; +- list_add_before(&instr->entry, &neg2->entry); ++ hlsl_block_add_instr(block, neg2); + +- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) ++ if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) + return false; + + for (i = 0; i < type->dimx; ++i) + one_value.u[i].f = 1.0f; + if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) + return false; +- list_add_before(&instr->entry, &one->entry); ++ hlsl_block_add_instr(block, one); + + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) + return false; +- list_add_before(&instr->entry, &div->entry); ++ hlsl_block_add_instr(block, div); + + if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, div, arg1))) + return false; +- list_add_before(&instr->entry, &mul2->entry); ++ hlsl_block_add_instr(block, mul2); + + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) + return false; +- list_add_before(&instr->entry, &frc->entry); ++ hlsl_block_add_instr(block, frc); + +- expr->op = HLSL_OP2_MUL; +- hlsl_src_remove(&expr->operands[0]); +- hlsl_src_remove(&expr->operands[1]); +- hlsl_src_from_node(&expr->operands[0], frc); +- hlsl_src_from_node(&expr->operands[1], cond); ++ if (!(mul3 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, frc, cond))) ++ return false; ++ hlsl_block_add_instr(block, mul3); ++ ++ return true; ++} ++ ++static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; ++ static const struct hlsl_constant_value zero_value; ++ struct hlsl_type *arg_type, *cmp_type; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; ++ struct hlsl_ir_jump *jump; ++ struct hlsl_block block; ++ unsigned int i, count; ++ ++ if (instr->type != HLSL_IR_JUMP) ++ return false; ++ jump = hlsl_ir_jump(instr); ++ if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) ++ return false; ++ ++ hlsl_block_init(&block); ++ ++ arg_type = jump->condition.node->data_type; ++ if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, zero); ++ ++ operands[0] = jump->condition.node; ++ operands[1] = zero; ++ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); ++ if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, cmp); ++ ++ if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, bool_false); ++ ++ or = bool_false; ++ ++ count = hlsl_type_component_count(cmp_type); ++ for (i = 0; i < count; ++i) ++ { ++ if (!(load = hlsl_add_load_component(ctx, &block, cmp, i, &instr->loc))) ++ return false; ++ ++ if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) ++ return NULL; ++ hlsl_block_add_instr(&block, or); ++ } ++ ++ list_move_tail(&instr->entry, &block.instrs); ++ hlsl_src_remove(&jump->condition); ++ hlsl_src_from_node(&jump->condition, or); ++ jump->type = HLSL_IR_JUMP_DISCARD_NZ; + + return true; + } +@@ -2698,7 +2906,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + continue; + regset = hlsl_type_get_regset(var->data_type); + +- if (var->reg_reservation.reg_type && var->regs[regset].bind_count) ++ if (var->reg_reservation.reg_type && var->regs[regset].allocation_size) + { + if (var->reg_reservation.reg_type != get_regset_name(regset)) + { +@@ -2716,7 +2924,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + var->regs[regset].id = var->reg_reservation.reg_index; + TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, + var->reg_reservation.reg_index, var->reg_reservation.reg_type, +- var->reg_reservation.reg_index + var->regs[regset].bind_count); ++ var->reg_reservation.reg_index + var->regs[regset].allocation_size); + } + } + } +@@ -2806,7 +3014,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + load->sampler.offset.node->last_read = last_read; + } + +- load->coords.node->last_read = last_read; ++ if (load->coords.node) ++ load->coords.node->last_read = last_read; + if (load->texel_offset.node) + load->texel_offset.node->last_read = last_read; + if (load->lod.node) +@@ -2848,8 +3057,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + index->idx.node->last_read = last_read; + break; + } +- case HLSL_IR_CONSTANT: + case HLSL_IR_JUMP: ++ { ++ struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); ++ ++ if (jump->condition.node) ++ jump->condition.node->last_read = last_read; ++ break; ++ } ++ case HLSL_IR_CONSTANT: + break; + } + } +@@ -2966,7 +3182,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); + + ret.id = reg_idx; +- ret.bind_count = 1; ++ ret.allocation_size = 1; + ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); + ret.allocated = true; + return ret; +@@ -3002,7 +3218,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); + + ret.id = reg_idx; +- ret.bind_count = align(reg_size, 4) / 4; ++ ret.allocation_size = align(reg_size, 4) / 4; + ret.allocated = true; + return ret; + } +@@ -3034,7 +3250,7 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct + return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); + } + +-static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; +@@ -3046,15 +3262,16 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; ++ + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); ++ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) ++ return false; + + if (regset == HLSL_REGSET_SAMPLERS) + { + enum hlsl_sampler_dim dim; + + assert(!load->sampler.var); +- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) +- return false; + + dim = var->objects_usage[regset][index].sampler_dim; + if (dim != load->sampling_dim) +@@ -3072,25 +3289,39 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n + return false; + } + } +- var->objects_usage[regset][index].used = true; +- var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + } +- else +- { +- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) +- return false; ++ var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + +- var->objects_usage[regset][index].used = true; +- var->objects_usage[regset][index].sampler_dim = load->sampling_dim; ++ return false; ++} + +- if (load->sampler.var) +- { +- var = load->sampler.var; +- if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) +- return false; ++static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_resource_load *load; ++ struct hlsl_ir_var *var; ++ enum hlsl_regset regset; ++ unsigned int index; + +- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; +- } ++ if (instr->type != HLSL_IR_RESOURCE_LOAD) ++ return false; ++ ++ load = hlsl_ir_resource_load(instr); ++ var = load->resource.var; ++ ++ regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); ++ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) ++ return false; ++ ++ var->objects_usage[regset][index].used = true; ++ var->bind_count[regset] = max(var->bind_count[regset], index + 1); ++ if (load->sampler.var) ++ { ++ var = load->sampler.var; ++ if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) ++ return false; ++ ++ var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; ++ var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); + } + + return false; +@@ -3100,7 +3331,7 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) + { + struct hlsl_ir_var *var; + struct hlsl_type *type; +- unsigned int i, k; ++ unsigned int k; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +@@ -3108,12 +3339,10 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { +- for (i = 0; i < type->reg_size[k]; ++i) +- { +- /* Samplers are only allocated until the last used one. */ +- if (var->objects_usage[k][i].used) +- var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; +- } ++ bool is_separated = var->is_separated_resource; ++ ++ if (var->bind_count[k] > 0) ++ var->regs[k].allocation_size = (k == HLSL_REGSET_SAMPLERS || is_separated) ? var->bind_count[k] : type->reg_size[k]; + } + } + } +@@ -3192,10 +3421,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, + } + } + ++static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) ++{ ++ struct hlsl_constant_defs *defs = &ctx->constant_defs; ++ struct hlsl_constant_register *reg; ++ size_t i; ++ ++ for (i = 0; i < defs->count; ++i) ++ { ++ reg = &defs->regs[i]; ++ if (reg->index == (component_index / 4)) ++ { ++ reg->value.f[component_index % 4] = f; ++ return; ++ } ++ } ++ ++ if (!hlsl_array_reserve(ctx, (void **)&defs->regs, &defs->size, defs->count + 1, sizeof(*defs->regs))) ++ return; ++ reg = &defs->regs[defs->count++]; ++ memset(reg, 0, sizeof(*reg)); ++ reg->index = component_index / 4; ++ reg->value.f[component_index % 4] = f; ++} ++ + static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct register_allocator *allocator) + { +- struct hlsl_constant_defs *defs = &ctx->constant_defs; + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) +@@ -3206,66 +3458,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + const struct hlsl_type *type = instr->data_type; +- unsigned int x, y, i, writemask, end_reg; +- unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int x, i; + + constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); + TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); + +- if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, +- constant->reg.id + reg_size / 4, sizeof(*defs->values))) +- return; +- end_reg = constant->reg.id + reg_size / 4; +- if (end_reg > defs->count) +- { +- memset(&defs->values[defs->count], 0, sizeof(*defs->values) * (end_reg - defs->count)); +- defs->count = end_reg; +- } +- + assert(type->class <= HLSL_CLASS_LAST_NUMERIC); ++ assert(type->dimy == 1); ++ assert(constant->reg.writemask); + +- if (!(writemask = constant->reg.writemask)) +- writemask = (1u << type->dimx) - 1; +- +- for (y = 0; y < type->dimy; ++y) ++ for (x = 0, i = 0; x < 4; ++x) + { +- for (x = 0, i = 0; x < 4; ++x) ++ const union hlsl_constant_value_component *value; ++ float f; ++ ++ if (!(constant->reg.writemask & (1u << x))) ++ continue; ++ value = &constant->value.u[i++]; ++ ++ switch (type->base_type) + { +- const union hlsl_constant_value_component *value; +- float f; +- +- if (!(writemask & (1u << x))) +- continue; +- value = &constant->value.u[i++]; +- +- switch (type->base_type) +- { +- case HLSL_TYPE_BOOL: +- f = !!value->u; +- break; +- +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- f = value->f; +- break; +- +- case HLSL_TYPE_INT: +- f = value->i; +- break; +- +- case HLSL_TYPE_UINT: +- f = value->u; +- break; +- +- case HLSL_TYPE_DOUBLE: +- FIXME("Double constant.\n"); +- return; +- +- default: +- vkd3d_unreachable(); +- } +- defs->values[constant->reg.id + y].f[x] = f; ++ case HLSL_TYPE_BOOL: ++ f = !!value->u; ++ break; ++ ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ f = value->f; ++ break; ++ ++ case HLSL_TYPE_INT: ++ f = value->i; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ f = value->u; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ FIXME("Double constant.\n"); ++ return; ++ ++ default: ++ vkd3d_unreachable(); + } ++ ++ record_constant(ctx, constant->reg.id * 4 + x, f); + } + + break; +@@ -3297,8 +3535,6 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + struct register_allocator allocator = {0}; + struct hlsl_ir_var *var; + +- allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); +- + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->last_read) +@@ -3315,6 +3551,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + } + } + ++ allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); ++ + vkd3d_free(allocator.allocations); + } + +@@ -3410,7 +3648,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + { + var->regs[HLSL_REGSET_NUMERIC].allocated = true; + var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; +- var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; ++ var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; + var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; + TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', + var->regs[HLSL_REGSET_NUMERIC], var->data_type)); +@@ -3497,7 +3735,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) + + LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) ++ if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) + continue; + + buffer = var1->buffer; +@@ -3508,7 +3746,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) + { + unsigned int var1_reg_size, var2_reg_size; + +- if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) ++ if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) + continue; + + if (var1 == var2 || var1->buffer != var2->buffer) +@@ -3558,7 +3796,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) ++ if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) + { + if (var->is_param) + var->buffer = ctx->params_buffer; +@@ -3589,7 +3827,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + } + + buffer->reg.id = buffer->reservation.reg_index; +- buffer->reg.bind_count = 1; ++ buffer->reg.allocation_size = 1; + buffer->reg.allocated = true; + TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); + } +@@ -3599,7 +3837,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + ++index; + + buffer->reg.id = index; +- buffer->reg.bind_count = 1; ++ buffer->reg.allocation_size = 1; + buffer->reg.allocated = true; + TRACE("Allocated %s to cb%u.\n", buffer->name, index); + ++index; +@@ -3618,7 +3856,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + } + + static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, +- uint32_t index) ++ uint32_t index, bool allocated_only) + { + const struct hlsl_ir_var *var; + unsigned int start, count; +@@ -3632,11 +3870,14 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum + * bound there even if the reserved vars aren't used. */ + start = var->reg_reservation.reg_index; + count = var->data_type->reg_size[regset]; ++ ++ if (!var->regs[regset].allocated && allocated_only) ++ continue; + } + else if (var->regs[regset].allocated) + { + start = var->regs[regset].id; +- count = var->regs[regset].bind_count; ++ count = var->regs[regset].allocation_size; + } + else + { +@@ -3667,11 +3908,12 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- unsigned int count = var->regs[regset].bind_count; ++ unsigned int count = var->regs[regset].allocation_size; + + if (count == 0) + continue; + ++ /* The variable was already allocated if it has a reservation. */ + if (var->regs[regset].allocated) + { + const struct hlsl_ir_var *reserved_object, *last_reported = NULL; +@@ -3690,7 +3932,10 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + { + index = var->regs[regset].id + i; + +- reserved_object = get_allocated_object(ctx, regset, index); ++ /* get_allocated_object() may return "var" itself, but we ++ * actually want that, otherwise we'll end up reporting the ++ * same conflict between the same two variables twice. */ ++ reserved_object = get_allocated_object(ctx, regset, index, true); + if (reserved_object && reserved_object != var && reserved_object != last_reported) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, +@@ -3709,7 +3954,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + + while (available < count) + { +- if (get_allocated_object(ctx, regset, index)) ++ if (get_allocated_object(ctx, regset, index, false)) + available = 0; + else + ++available; +@@ -3853,6 +4098,7 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) + { + struct hlsl_ir_node *offset_node = deref->offset.node; ++ enum hlsl_regset regset; + unsigned int size; + + if (!offset_node) +@@ -3869,8 +4115,9 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref + return false; + + *offset = hlsl_ir_constant(offset_node)->value.u[0].u; ++ regset = hlsl_type_get_regset(deref->data_type); + +- size = deref->var->data_type->reg_size[deref->offset_regset]; ++ size = deref->var->data_type->reg_size[regset]; + if (*offset >= size) + { + hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +@@ -3900,7 +4147,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere + struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + +- assert(deref->offset_regset == HLSL_REGSET_NUMERIC); ++ assert(deref->data_type); ++ assert(deref->data_type->class <= HLSL_CLASS_LAST_NUMERIC); + + ret.id += offset / 4; + +@@ -4008,7 +4256,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) +- prepend_uniform_copy(ctx, &body->instrs, var); ++ prepend_uniform_copy(ctx, body, var); + } + + for (i = 0; i < entry_func->parameters.count; ++i) +@@ -4017,7 +4265,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + + if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + { +- prepend_uniform_copy(ctx, &body->instrs, var); ++ prepend_uniform_copy(ctx, body, var); + } + else + { +@@ -4033,9 +4281,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + } + + if (var->storage_modifiers & HLSL_STORAGE_IN) +- prepend_input_var_copy(ctx, &body->instrs, var); ++ prepend_input_var_copy(ctx, body, var); + if (var->storage_modifiers & HLSL_STORAGE_OUT) +- append_output_var_copy(ctx, &body->instrs, var); ++ append_output_var_copy(ctx, body, var); + } + } + if (entry_func->return_var) +@@ -4044,7 +4292,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); + +- append_output_var_copy(ctx, &body->instrs, entry_func->return_var); ++ append_output_var_copy(ctx, body, entry_func->return_var); + } + + for (i = 0; i < entry_func->attr_count; ++i) +@@ -4062,6 +4310,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); + ++ if (profile->major_version >= 4) ++ { ++ hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); ++ } + hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); + while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); + do +@@ -4075,10 +4327,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); +- hlsl_transform_ir(ctx, lower_int_division, body, NULL); +- hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); ++ lower_ir(ctx, lower_int_division, body); ++ lower_ir(ctx, lower_int_modulus, body); + hlsl_transform_ir(ctx, lower_int_abs, body, NULL); +- hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); ++ lower_ir(ctx, lower_float_modulus, body); + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + do + { +@@ -4094,6 +4346,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); + ++ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); ++ hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); ++ if (profile->major_version >= 4) ++ hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); ++ hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); ++ sort_synthetic_separated_samplers_first(ctx); ++ + if (profile->major_version < 4) + { + hlsl_transform_ir(ctx, lower_division, body, NULL); +@@ -4107,9 +4366,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, lower_abs, body, NULL); + } + +- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); +- hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); +- + /* TODO: move forward, remove when no longer needed */ + transform_derefs(ctx, replace_deref_path_with_offset, body); + while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +index 301113c8477..41a72ab6c0d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +@@ -80,7 +80,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + return false; + } + +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (src->node.data_type->base_type) + { +@@ -152,6 +152,51 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + return true; + } + ++static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, ++ "Indefinite logarithm result."); ++ } ++ dst->u[k].f = log2f(src->value.u[k].f); ++ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, ++ "Infinities and NaNs are not allowed by the shader model."); ++ } ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (src->value.u[k].d < 0.0) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, ++ "Indefinite logarithm result."); ++ } ++ dst->u[k].d = log2(src->value.u[k].d); ++ break; ++ ++ default: ++ FIXME("Fold 'log2' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +@@ -160,7 +205,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + + assert(type == src->node.data_type->base_type); + +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { +@@ -186,6 +231,96 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + return true; + } + ++static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (ctx->profile->major_version >= 4 && src->value.u[k].f == 0.0f) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, ++ "Floating point division by zero."); ++ } ++ dst->u[k].f = 1.0f / src->value.u[k].f; ++ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, ++ "Infinities and NaNs are not allowed by the shader model."); ++ } ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (src->value.u[k].d == 0.0) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, ++ "Floating point division by zero."); ++ } ++ dst->u[k].d = 1.0 / src->value.u[k].d; ++ break; ++ ++ default: ++ FIXME("Fold 'rcp' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, ++ "Imaginary square root result."); ++ } ++ dst->u[k].f = sqrtf(src->value.u[k].f); ++ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, ++ "Infinities and NaNs are not allowed by the shader model."); ++ } ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (src->value.u[k].d < 0.0) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, ++ "Imaginary square root result."); ++ } ++ dst->u[k].d = sqrt(src->value.u[k].d); ++ break; ++ ++ default: ++ FIXME("Fold 'sqrt' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +@@ -195,7 +330,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { +@@ -223,7 +358,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + return true; + } + +-static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { + enum hlsl_base_type type = dst_type->base_type; +@@ -232,65 +367,132 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; + break; + +- case HLSL_TYPE_DOUBLE: +- dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; ++ default: ++ FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; + break; + ++ default: ++ FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; ++ dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; + break; + + default: +- FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + +- for (k = 0; k < 4; ++k) ++ dst->u[0].f = 0.0f; ++ for (k = 0; k < src1->node.data_type->dimx; ++k) + { +- switch (src1->node.data_type->base_type) ++ switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; ++ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; + break; ++ default: ++ FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } + +- case HLSL_TYPE_DOUBLE: +- dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; +- break; ++ return true; ++} + +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- case HLSL_TYPE_BOOL: +- dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; +- break; ++static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; + ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ assert(type == src3->node.data_type->base_type); ++ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); ++ assert(src3->node.data_type->dimx == 1); ++ ++ dst->u[0].f = src3->value.u[0].f; ++ for (k = 0; k < src1->node.data_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; ++ break; + default: +- vkd3d_unreachable(); ++ FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; + } +- +- dst->u[k].u *= ~0u; + } ++ + return true; + } + +@@ -363,45 +565,116 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + return true; + } + +-static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, +- const struct vkd3d_shader_location *loc) ++static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { +- switch (type) ++ switch (src1->node.data_type->base_type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; ++ break; ++ + case HLSL_TYPE_INT: +- if (src2->value.u[k].i == 0) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); +- return false; +- } +- if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) +- dst->u[k].i = 0; +- else +- dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ dst->u[k].u *= ~0u; ++ } ++ return true; ++} ++ ++static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ unsigned int k; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (src1->node.data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; ++ break; ++ ++ case HLSL_TYPE_INT: ++ dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; + break; + + case HLSL_TYPE_UINT: +- if (src2->value.u[k].u == 0) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); +- return false; +- } +- dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; + break; + + default: +- FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); +- return false; ++ vkd3d_unreachable(); + } ++ ++ dst->u[k].u *= ~0u; ++ } ++ return true; ++} ++ ++static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ unsigned int k; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (src1->node.data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; ++ break; ++ ++ case HLSL_TYPE_INT: ++ dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ dst->u[k].u *= ~0u; + } + return true; + } +@@ -419,6 +692,15 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + { + switch (type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); ++ break; ++ + case HLSL_TYPE_INT: + dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); + break; +@@ -448,6 +730,15 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + { + switch (type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); ++ break; ++ + case HLSL_TYPE_INT: + dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); + break; +@@ -464,8 +755,9 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + return true; + } + +-static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, ++ const struct vkd3d_shader_location *loc) + { + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; +@@ -478,19 +770,35 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + switch (type) + { + case HLSL_TYPE_INT: ++ if (src2->value.u[k].i == 0) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); ++ return false; ++ } ++ if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) ++ dst->u[k].i = 0; ++ else ++ dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; ++ break; ++ + case HLSL_TYPE_UINT: +- dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; ++ if (src2->value.u[k].u == 0) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); ++ return false; ++ } ++ dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; + break; + + default: +- FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { + enum hlsl_base_type type = dst_type->base_type; +@@ -503,48 +811,67 @@ static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + { + switch (type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; ++ break; ++ + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; ++ dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; + break; + + default: +- FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { +- switch (type) ++ switch (src1->node.data_type->base_type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; ++ break; ++ + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; + break; + + default: +- FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); +- return false; ++ vkd3d_unreachable(); + } ++ ++ dst->u[k].u *= ~0u; + } + return true; + } + + bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { +- struct hlsl_ir_constant *arg1, *arg2 = NULL; ++ struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; + struct hlsl_constant_value res = {0}; + struct hlsl_ir_node *res_node; + struct hlsl_ir_expr *expr; +@@ -572,6 +899,8 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + arg1 = hlsl_ir_constant(expr->operands[0].node); + if (expr->operands[1].node) + arg2 = hlsl_ir_constant(expr->operands[1].node); ++ if (expr->operands[2].node) ++ arg3 = hlsl_ir_constant(expr->operands[2].node); + + switch (expr->op) + { +@@ -583,28 +912,58 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_cast(ctx, &res, instr->data_type, arg1); + break; + ++ case HLSL_OP1_LOG2: ++ success = fold_log2(ctx, &res, instr->data_type, arg1, &instr->loc); ++ break; ++ + case HLSL_OP1_NEG: + success = fold_neg(ctx, &res, instr->data_type, arg1); + break; + ++ case HLSL_OP1_RCP: ++ success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); ++ break; ++ ++ case HLSL_OP1_SQRT: ++ success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); ++ break; ++ + case HLSL_OP2_ADD: + success = fold_add(ctx, &res, instr->data_type, arg1, arg2); + break; + +- case HLSL_OP2_MUL: +- success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_BIT_AND: ++ case HLSL_OP2_LOGIC_AND: ++ success = fold_and(ctx, &res, instr->data_type, arg1, arg2); + break; + +- case HLSL_OP2_NEQUAL: +- success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_BIT_OR: ++ case HLSL_OP2_LOGIC_OR: ++ success = fold_or(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_BIT_XOR: ++ success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_DOT: ++ success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_DIV: + success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); + break; + +- case HLSL_OP2_MOD: +- success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); ++ case HLSL_OP2_EQUAL: ++ success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_GEQUAL: ++ success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_LESS: ++ success = fold_less(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_MAX: +@@ -615,16 +974,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_min(ctx, &res, instr->data_type, arg1, arg2); + break; + +- case HLSL_OP2_BIT_XOR: +- success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_MOD: ++ success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); + break; + +- case HLSL_OP2_BIT_AND: +- success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_MUL: ++ success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); + break; + +- case HLSL_OP2_BIT_OR: +- success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_NEQUAL: ++ success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP3_DP2ADD: ++ success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); + break; + + default: +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 9eefb82c226..705905f7888 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -85,6 +85,72 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i + shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); + } + ++static const struct vkd3d_shader_varying_map *find_varying_map( ++ const struct vkd3d_shader_next_stage_info *next_stage, unsigned int signature_idx) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < next_stage->varying_count; ++i) ++ { ++ if (next_stage->varying_map[i].output_signature_index == signature_idx) ++ return &next_stage->varying_map[i]; ++ } ++ ++ return NULL; ++} ++ ++static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_compile_info *compile_info) ++{ ++ struct shader_signature *signature = &parser->shader_desc.output_signature; ++ const struct vkd3d_shader_next_stage_info *next_stage; ++ unsigned int i; ++ ++ if (!(next_stage = vkd3d_find_struct(compile_info->next, NEXT_STAGE_INFO))) ++ return VKD3D_OK; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ const struct vkd3d_shader_varying_map *map = find_varying_map(next_stage, i); ++ struct signature_element *e = &signature->elements[i]; ++ ++ if (map) ++ { ++ unsigned int input_mask = map->input_mask; ++ ++ e->target_location = map->input_register_index; ++ ++ /* It is illegal in Vulkan if the next shader uses the same varying ++ * location with a different mask. */ ++ if (input_mask && input_mask != e->mask) ++ { ++ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Aborting due to not yet implemented feature: " ++ "Output mask %#x does not match input mask %#x.", ++ e->mask, input_mask); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ } ++ else ++ { ++ e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; ++ } ++ } ++ ++ for (i = 0; i < next_stage->varying_count; ++i) ++ { ++ if (next_stage->varying_map[i].output_signature_index >= signature->element_count) ++ { ++ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Aborting due to not yet implemented feature: " ++ "The next stage consumes varyings not written by this stage."); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ + struct hull_flattener + { + struct vkd3d_shader_instruction_array instructions; +@@ -247,13 +313,13 @@ static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_s + reg->immconst_type = VKD3D_IMMCONST_SCALAR; + } + +-static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) ++void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) + { + memset(ins, 0, sizeof(*ins)); + ins->handler_idx = handler_idx; + } + +-enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) ++static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) + { + struct hull_flattener flattener = {*src_instructions}; + struct vkd3d_shader_instruction_array *instructions; +@@ -388,7 +454,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p + return VKD3D_OK; + } + +-enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( ++static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( + struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) + { + struct vkd3d_shader_instruction_array *instructions; +@@ -999,7 +1065,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + shader_instruction_init(ins, VKD3DSIH_NOP); + } + +-enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, ++static enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, + enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, + struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) + { +@@ -1070,3 +1136,159 @@ enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_i + *instructions = normaliser.instructions; + return VKD3D_OK; + } ++ ++struct flat_constant_def ++{ ++ enum vkd3d_shader_d3dbc_constant_register set; ++ uint32_t index; ++ uint32_t value[4]; ++}; ++ ++struct flat_constants_normaliser ++{ ++ struct vkd3d_shader_parser *parser; ++ struct flat_constant_def *defs; ++ size_t def_count, defs_capacity; ++}; ++ ++static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, ++ enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) ++{ ++ static const struct ++ { ++ enum vkd3d_shader_register_type type; ++ enum vkd3d_shader_d3dbc_constant_register set; ++ uint32_t offset; ++ } ++ regs[] = ++ { ++ {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, ++ {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, ++ {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, ++ {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, ++ {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, ++ {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, ++ }; ++ ++ unsigned int i; ++ ++ for (i = 0; i < ARRAY_SIZE(regs); ++i) ++ { ++ if (reg->type == regs[i].type) ++ { ++ if (reg->idx[0].rel_addr) ++ { ++ FIXME("Unhandled relative address.\n"); ++ return false; ++ } ++ ++ *set = regs[i].set; ++ *index = regs[i].offset + reg->idx[0].offset; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_param *param, ++ const struct flat_constants_normaliser *normaliser) ++{ ++ enum vkd3d_shader_d3dbc_constant_register set; ++ uint32_t index; ++ size_t i, j; ++ ++ if (!get_flat_constant_register_type(¶m->reg, &set, &index)) ++ return; ++ ++ for (i = 0; i < normaliser->def_count; ++i) ++ { ++ if (normaliser->defs[i].set == set && normaliser->defs[i].index == index) ++ { ++ param->reg.type = VKD3DSPR_IMMCONST; ++ param->reg.idx_count = 0; ++ param->reg.immconst_type = VKD3D_IMMCONST_VEC4; ++ for (j = 0; j < 4; ++j) ++ param->reg.u.immconst_uint[j] = normaliser->defs[i].value[j]; ++ return; ++ } ++ } ++ ++ param->reg.type = VKD3DSPR_CONSTBUFFER; ++ param->reg.idx[0].offset = set; /* register ID */ ++ param->reg.idx[1].offset = set; /* register index */ ++ param->reg.idx[2].offset = index; /* buffer index */ ++ param->reg.idx_count = 3; ++} ++ ++static enum vkd3d_result instruction_array_normalise_flat_constants(struct vkd3d_shader_parser *parser) ++{ ++ struct flat_constants_normaliser normaliser = {.parser = parser}; ++ unsigned int i, j; ++ ++ for (i = 0; i < parser->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; ++ ++ if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) ++ { ++ struct flat_constant_def *def; ++ ++ if (!vkd3d_array_reserve((void **)&normaliser.defs, &normaliser.defs_capacity, ++ normaliser.def_count + 1, sizeof(*normaliser.defs))) ++ { ++ vkd3d_free(normaliser.defs); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ def = &normaliser.defs[normaliser.def_count++]; ++ ++ get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); ++ for (j = 0; j < 4; ++j) ++ def->value[j] = ins->src[0].reg.u.immconst_uint[j]; ++ ++ vkd3d_shader_instruction_make_nop(ins); ++ } ++ else ++ { ++ for (j = 0; j < ins->src_count; ++j) ++ shader_register_normalise_flat_constants((struct vkd3d_shader_src_param *)&ins->src[j], &normaliser); ++ } ++ } ++ ++ vkd3d_free(normaliser.defs); ++ return VKD3D_OK; ++} ++ ++enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_compile_info *compile_info) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &parser->instructions; ++ enum vkd3d_result result = VKD3D_OK; ++ ++ if (parser->shader_desc.is_dxil) ++ return result; ++ ++ if (parser->shader_version.type != VKD3D_SHADER_TYPE_PIXEL ++ && (result = remap_output_signature(parser, compile_info)) < 0) ++ return result; ++ ++ if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL ++ && (result = instruction_array_flatten_hull_shader_phases(instructions)) >= 0) ++ { ++ result = instruction_array_normalise_hull_shader_control_point_io(instructions, ++ &parser->shader_desc.input_signature); ++ } ++ if (result >= 0) ++ result = instruction_array_normalise_io_registers(instructions, parser->shader_version.type, ++ &parser->shader_desc.input_signature, &parser->shader_desc.output_signature, ++ &parser->shader_desc.patch_constant_signature); ++ ++ if (result >= 0) ++ result = instruction_array_normalise_flat_constants(parser); ++ ++ if (result >= 0 && TRACE_ON()) ++ vkd3d_shader_trace(instructions, &parser->shader_version); ++ ++ return result; ++} +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l +index 94079696280..6fb61eff6c3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.l ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l +@@ -30,6 +30,13 @@ + + #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) + ++static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) ++{ ++ if (!ctx->expansion_count) ++ return NULL; ++ return ctx->expansion_stack[ctx->expansion_count - 1].macro; ++} ++ + static void update_location(struct preproc_ctx *ctx); + + #define YY_USER_ACTION update_location(yyget_extra(yyscanner)); +@@ -125,7 +132,20 @@ INT_SUFFIX [uUlL]{0,2} + const char *p; + + if (!ctx->last_was_newline) +- return T_HASHSTRING; ++ { ++ struct preproc_macro *macro; ++ ++ /* Stringification is only done for function-like macro bodies. ++ * Anywhere else, we need to parse it as two separate tokens. ++ * We could use a state for this, but yyless() is easier and cheap. ++ */ ++ ++ if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) ++ return T_HASHSTRING; ++ ++ yyless(1); ++ return T_TEXT; ++ } + + for (p = yytext + 1; strchr(" \t", *p); ++p) + ; +@@ -219,13 +239,6 @@ static bool preproc_is_writing(struct preproc_ctx *ctx) + return file->if_stack[file->if_count - 1].current_true; + } + +-static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) +-{ +- if (!ctx->expansion_count) +- return NULL; +- return ctx->expansion_stack[ctx->expansion_count - 1].macro; +-} +- + /* Concatenation is not done for object-like macros, but is done for both + * function-like macro bodies and their arguments. */ + static bool should_concat(struct preproc_ctx *ctx) +@@ -334,6 +347,43 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, + return true; + } + ++static void preproc_stringify(struct preproc_ctx *ctx, struct vkd3d_string_buffer *buffer, const char *text) ++{ ++ const struct preproc_text *expansion; ++ const char *p = text + 1; ++ unsigned int i; ++ ++ while (*p == ' ' || *p == '\t') ++ ++p; ++ ++ vkd3d_string_buffer_printf(buffer, "\""); ++ if ((expansion = find_arg_expansion(ctx, p))) ++ { ++ size_t len = expansion->text.content_size; ++ size_t start = 0; ++ ++ while (len && strchr(" \t\r\n", expansion->text.buffer[len - 1])) ++ --len; ++ ++ while (start < len && strchr(" \t\r\n", expansion->text.buffer[start])) ++ ++start; ++ ++ for (i = start; i < len; ++i) ++ { ++ char c = expansion->text.buffer[i]; ++ ++ if (c == '\\' || c == '"') ++ vkd3d_string_buffer_printf(buffer, "\\"); ++ vkd3d_string_buffer_printf(buffer, "%c", c); ++ } ++ } ++ else ++ { ++ vkd3d_string_buffer_printf(buffer, "%s", p); ++ } ++ vkd3d_string_buffer_printf(buffer, "\""); ++} ++ + int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + { + struct preproc_ctx *ctx = yyget_extra(scanner); +@@ -441,9 +491,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + switch (func_state->state) + { + case STATE_NONE: +- { +- struct preproc_macro *macro; +- + if (token == T_CONCAT && should_concat(ctx)) + { + while (ctx->buffer.content_size +@@ -452,37 +499,17 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + break; + } + +- /* Stringification, however, is only done for function-like +- * macro bodies. */ +- if (token == T_HASHSTRING && (macro = preproc_get_top_macro(ctx)) && macro->arg_count) ++ if (token == T_HASHSTRING) + { +- const struct preproc_text *expansion; +- const char *p = text + 1; +- unsigned int i; ++ struct vkd3d_string_buffer buffer; + + if (ctx->current_directive) + return return_token(token, lval, text); + +- while (*p == ' ' || *p == '\t') +- ++p; +- +- vkd3d_string_buffer_printf(&ctx->buffer, "\""); +- if ((expansion = find_arg_expansion(ctx, p))) +- { +- for (i = 0; i < expansion->text.content_size; ++i) +- { +- char c = expansion->text.buffer[i]; +- +- if (c == '\\' || c == '"') +- vkd3d_string_buffer_printf(&ctx->buffer, "\\"); +- vkd3d_string_buffer_printf(&ctx->buffer, "%c", c); +- } +- } +- else +- { +- vkd3d_string_buffer_printf(&ctx->buffer, "%s", p); +- } +- vkd3d_string_buffer_printf(&ctx->buffer, "\""); ++ vkd3d_string_buffer_init(&buffer); ++ preproc_stringify(ctx, &buffer, text); ++ vkd3d_string_buffer_printf(&ctx->buffer, "%s", buffer.buffer); ++ vkd3d_string_buffer_cleanup(&buffer); + break; + } + +@@ -586,7 +613,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + else + vkd3d_string_buffer_printf(&ctx->buffer, "%s ", text); + break; +- } + + case STATE_IDENTIFIER: + if (token == '(') +@@ -628,6 +654,41 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + + switch (token) + { ++ /* Most text gets left alone (e.g. if it contains macros, ++ * the macros should be evaluated later). ++ * Arguments are a special case, and are replaced with ++ * their values immediately. */ ++ case T_IDENTIFIER: ++ case T_IDENTIFIER_PAREN: ++ { ++ const struct preproc_text *expansion; ++ ++ if ((expansion = find_arg_expansion(ctx, text))) ++ { ++ preproc_push_expansion(ctx, expansion, NULL); ++ continue; ++ } ++ ++ if (current_arg) ++ preproc_text_add(current_arg, text); ++ break; ++ } ++ ++ /* Stringification is another special case. Unsurprisingly, ++ * we need to stringify if this is an argument. More ++ * surprisingly, we need to stringify even if it's not. */ ++ case T_HASHSTRING: ++ { ++ struct vkd3d_string_buffer buffer; ++ ++ vkd3d_string_buffer_init(&buffer); ++ preproc_stringify(ctx, &buffer, text); ++ if (current_arg) ++ preproc_text_add(current_arg, buffer.buffer); ++ vkd3d_string_buffer_cleanup(&buffer); ++ break; ++ } ++ + case T_NEWLINE: + if (current_arg) + preproc_text_add(current_arg, " "); +@@ -686,6 +747,9 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + if (current_arg) + preproc_text_add(current_arg, text); + } ++ ++ if (current_arg) ++ preproc_text_add(current_arg, " "); + break; + } + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 3542b5fac51..fa605f185ae 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -199,6 +199,21 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d + } + } + ++static inline bool register_is_undef(const struct vkd3d_shader_register *reg) ++{ ++ return reg->type == VKD3DSPR_UNDEF; ++} ++ ++static inline bool register_is_constant(const struct vkd3d_shader_register *reg) ++{ ++ return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); ++} ++ ++static inline bool register_is_constant_or_undef(const struct vkd3d_shader_register *reg) ++{ ++ return register_is_constant(reg) || register_is_undef(reg); ++} ++ + #define VKD3D_SPIRV_VERSION 0x00010000 + #define VKD3D_SPIRV_GENERATOR_ID 18 + #define VKD3D_SPIRV_GENERATOR_VERSION 8 +@@ -1746,6 +1761,38 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, + } + } + ++static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, ++ enum vkd3d_data_type data_type, unsigned int component_count) ++{ ++ uint32_t scalar_id; ++ ++ if (component_count == 1) ++ { ++ switch (data_type) ++ { ++ case VKD3D_DATA_FLOAT: ++ case VKD3D_DATA_SNORM: ++ case VKD3D_DATA_UNORM: ++ return vkd3d_spirv_get_op_type_float(builder, 32); ++ break; ++ case VKD3D_DATA_INT: ++ case VKD3D_DATA_UINT: ++ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); ++ break; ++ case VKD3D_DATA_DOUBLE: ++ return vkd3d_spirv_get_op_type_float(builder, 64); ++ default: ++ FIXME("Unhandled data type %#x.\n", data_type); ++ return 0; ++ } ++ } ++ else ++ { ++ scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1); ++ return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); ++ } ++} ++ + static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) + { + vkd3d_spirv_stream_init(&builder->debug_stream); +@@ -2263,7 +2310,7 @@ struct spirv_compiler + + uint32_t binding_idx; + +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; + unsigned int input_control_point_count; + unsigned int output_control_point_count; + bool use_vocp; +@@ -2333,7 +2380,7 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) + + static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, + struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) + { + const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; +@@ -2429,13 +2476,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve + + compiler->shader_type = shader_version->type; + +- compiler->input_signature = shader_desc->input_signature; +- compiler->output_signature = shader_desc->output_signature; +- compiler->patch_constant_signature = shader_desc->patch_constant_signature; +- memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); +- memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); +- memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); +- + if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) + { + compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); +@@ -2536,13 +2576,13 @@ static bool spirv_compiler_check_shader_visibility(const struct spirv_compiler * + } + + static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_constant_buffer( +- const struct spirv_compiler *compiler, const struct vkd3d_shader_constant_buffer *cb) ++ const struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) + { +- unsigned int register_space = cb->range.space; +- unsigned int reg_idx = cb->range.first; ++ unsigned int register_space = range->space; ++ unsigned int reg_idx = range->first; + unsigned int i; + +- if (cb->range.first != cb->range.last) ++ if (range->first != range->last) + return NULL; + + for (i = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) +@@ -3211,13 +3251,13 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil + struct vkd3d_symbol reg_symbol, *symbol; + struct rb_entry *entry; + +- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); ++ assert(!register_is_constant_or_undef(reg)); + + if (reg->type == VKD3DSPR_TEMP) + { + assert(reg->idx[0].offset < compiler->temp_count); + register_info->id = compiler->temp_id + reg->idx[0].offset; +- register_info->storage_class = SpvStorageClassFunction; ++ register_info->storage_class = SpvStorageClassPrivate; + register_info->descriptor_array = NULL; + register_info->member_idx = 0; + register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; +@@ -3553,6 +3593,19 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi + vkd3d_component_type_from_data_type(reg->data_type), component_count, values); + } + ++static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register *reg, DWORD write_mask) ++{ ++ unsigned int component_count = vkd3d_write_mask_component_count(write_mask); ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ uint32_t type_id; ++ ++ assert(reg->type == VKD3DSPR_UNDEF); ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); ++ return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); ++} ++ + static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask, + const struct vkd3d_shader_register_info *reg_info) +@@ -3563,7 +3616,7 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, + enum vkd3d_shader_component_type component_type; + unsigned int skipped_component_mask; + +- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); ++ assert(!register_is_constant_or_undef(reg)); + assert(vkd3d_write_mask_component_count(write_mask) == 1); + + component_idx = vkd3d_write_mask_get_component_idx(write_mask); +@@ -3615,6 +3668,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); + else if (reg->type == VKD3DSPR_IMMCONST64) + return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); ++ else if (reg->type == VKD3DSPR_UNDEF) ++ return spirv_compiler_emit_load_undef(compiler, reg, write_mask); + + component_count = vkd3d_write_mask_component_count(write_mask); + component_type = vkd3d_component_type_from_data_type(reg->data_type); +@@ -3827,7 +3882,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, + unsigned int src_write_mask = write_mask; + uint32_t type_id; + +- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); ++ assert(!register_is_constant_or_undef(reg)); + + if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) + return; +@@ -3998,6 +4053,11 @@ static void spirv_compiler_emit_interpolation_decorations(struct spirv_compiler + vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); + break; ++ case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: ++ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationNoPerspective, NULL, 0); ++ vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); ++ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); ++ break; + default: + FIXME("Unhandled interpolation mode %#x.\n", mode); + break; +@@ -4542,7 +4602,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + } + else + { +- unsigned int location = signature_element->register_index; ++ unsigned int location = signature_element->target_location; + + input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, input_component_count, array_sizes, 2); +@@ -4918,9 +4978,15 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + + spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); + } ++ else if (signature_element->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) ++ { ++ storage_class = SpvStorageClassPrivate; ++ id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, ++ storage_class, component_type, output_component_count, array_sizes, 2); ++ } + else + { +- unsigned int location = signature_element->register_index; ++ unsigned int location = signature_element->target_location; + + if (is_patch_constant) + location += shader_signature_next_location(&compiler->output_signature); +@@ -4929,10 +4995,10 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + storage_class, component_type, output_component_count, array_sizes, 2); + vkd3d_spirv_add_iface_variable(builder, id); + +- if (is_dual_source_blending(compiler) && signature_element->register_index < 2) ++ if (is_dual_source_blending(compiler) && location < 2) + { + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); +- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); ++ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, location); + } + else + { +@@ -5258,8 +5324,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler + WARN("Unhandled global flags %#x.\n", flags); + } + +-static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t count) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + size_t function_location; +@@ -5270,11 +5335,11 @@ static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, + vkd3d_spirv_begin_function_stream_insertion(builder, function_location); + + assert(!compiler->temp_count); +- compiler->temp_count = instruction->declaration.count; ++ compiler->temp_count = count; + for (i = 0; i < compiler->temp_count; ++i) + { +- id = spirv_compiler_emit_variable(compiler, &builder->function_stream, +- SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); ++ id = spirv_compiler_emit_variable(compiler, &builder->global_stream, ++ SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + if (!i) + compiler->temp_id = id; + assert(id == compiler->temp_id + i); +@@ -5473,28 +5538,24 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * + return var_id; + } + +-static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_constant_buffer(struct spirv_compiler *compiler, unsigned int size, ++ const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_register *reg) + { +- const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; + const SpvStorageClass storage_class = SpvStorageClassUniform; +- const struct vkd3d_shader_register *reg = &cb->src.reg; + struct vkd3d_push_constant_buffer_binding *push_cb; + struct vkd3d_descriptor_variable_info var_info; + struct vkd3d_symbol reg_symbol; + +- assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); +- +- if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, cb))) ++ if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, range))) + { + /* Push constant buffers are handled in + * spirv_compiler_emit_push_constant_buffers(). + */ +- unsigned int cb_size_in_bytes = cb->size * VKD3D_VEC4_SIZE * sizeof(uint32_t); ++ unsigned int cb_size_in_bytes = size * VKD3D_VEC4_SIZE * sizeof(uint32_t); + push_cb->reg = *reg; +- push_cb->size = cb->size; ++ push_cb->size = size; + if (cb_size_in_bytes > push_cb->pc.size) + { + WARN("Constant buffer size %u exceeds push constant size %u.\n", +@@ -5504,17 +5565,17 @@ static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compi + } + + vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); +- length_id = spirv_compiler_get_constant_uint(compiler, cb->size); ++ length_id = spirv_compiler_get_constant_uint(compiler, size); + array_type_id = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); + vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 16); + + struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); + vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); + vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); +- vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", cb->size); ++ vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); + + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, +- reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); ++ reg, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); + + vkd3d_symbol_make_register(®_symbol, reg); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, +@@ -5524,6 +5585,16 @@ static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compi + spirv_compiler_put_symbol(compiler, ®_symbol); + } + ++static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; ++ ++ assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); ++ ++ spirv_compiler_emit_constant_buffer(compiler, cb->size, &cb->range, &cb->src.reg); ++} ++ + static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { +@@ -5624,13 +5695,13 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty + } + } + +-static const struct vkd3d_shader_descriptor_info *spirv_compiler_get_descriptor_info( ++static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( + struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, + const struct vkd3d_shader_register_range *range) + { +- const struct vkd3d_shader_scan_descriptor_info *descriptor_info = compiler->scan_descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; + unsigned int register_last = (range->last == ~0u) ? range->first : range->last; +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + unsigned int i; + + for (i = 0; i < descriptor_info->descriptor_count; ++i) +@@ -5650,7 +5721,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler + bool raw_structured, uint32_t depth) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + bool uav_read, uav_atomics; + uint32_t sampled_type_id; + SpvImageFormat format; +@@ -5685,7 +5756,7 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi + const struct vkd3d_shader_combined_resource_sampler *current; + uint32_t image_type_id, type_id, ptr_type_id, var_id; + enum vkd3d_shader_binding_flag resource_type_flag; +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + struct vkd3d_symbol symbol; + unsigned int i; + bool depth; +@@ -5818,7 +5889,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + + if (is_uav) + { +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + + d = spirv_compiler_get_descriptor_info(compiler, + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); +@@ -6236,9 +6307,6 @@ static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) + + vkd3d_spirv_build_op_function_end(builder); + +- compiler->temp_id = 0; +- compiler->temp_count = 0; +- + if (is_in_control_point_phase(compiler)) + { + if (compiler->epilogue_function_id) +@@ -6640,7 +6708,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, + uint32_t components[VKD3D_VEC4_SIZE]; + unsigned int i, component_count; + +- if (src->reg.type == VKD3DSPR_IMMCONST || src->reg.type == VKD3DSPR_IMMCONST64 || dst->modifiers || src->modifiers) ++ if (register_is_constant_or_undef(&src->reg) || dst->modifiers || src->modifiers) + goto general_implementation; + + spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); +@@ -9103,9 +9171,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_GLOBAL_FLAGS: + spirv_compiler_emit_dcl_global_flags(compiler, instruction); + break; +- case VKD3DSIH_DCL_TEMPS: +- spirv_compiler_emit_dcl_temps(compiler, instruction); +- break; + case VKD3DSIH_DCL_INDEXABLE_TEMP: + spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); + break; +@@ -9426,6 +9491,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + spirv_compiler_emit_cut_stream(compiler, instruction); + break; + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: ++ case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_NOP: + /* nothing to do */ +@@ -9437,6 +9503,26 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + return ret; + } + ++static void spirv_compiler_emit_sm1_constant_buffer(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_desc *desc, enum vkd3d_shader_d3dbc_constant_register set, ++ enum vkd3d_data_type data_type) ++{ ++ struct vkd3d_shader_register_range range = {.space = 0, .first = set, .last = set}; ++ uint32_t count = desc->flat_constant_count[set].external; ++ struct vkd3d_shader_register reg = ++ { ++ .type = VKD3DSPR_CONSTBUFFER, ++ .idx[0].offset = set, /* register ID */ ++ .idx[1].offset = set, /* register index */ ++ .idx[2].offset = count, /* size */ ++ .idx_count = 3, ++ .data_type = data_type, ++ }; ++ ++ if (count) ++ spirv_compiler_emit_constant_buffer(compiler, count, &range, ®); ++} ++ + static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, + struct vkd3d_shader_code *spirv) +@@ -9444,28 +9530,36 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_result result = VKD3D_OK; + unsigned int i; + ++ if (parser->shader_desc.temp_count) ++ spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); ++ ++ spirv_compiler_emit_sm1_constant_buffer(compiler, &parser->shader_desc, ++ VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, VKD3D_DATA_FLOAT); ++ spirv_compiler_emit_sm1_constant_buffer(compiler, &parser->shader_desc, ++ VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, VKD3D_DATA_INT); ++ spirv_compiler_emit_sm1_constant_buffer(compiler, &parser->shader_desc, ++ VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, VKD3D_DATA_UINT); ++ + compiler->location.column = 0; + compiler->location.line = 1; + ++ if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) ++ return result; ++ + instructions = parser->instructions; + memset(&parser->instructions, 0, sizeof(parser->instructions)); + +- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL +- && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) +- { +- result = instruction_array_normalise_hull_shader_control_point_io(&instructions, +- &compiler->input_signature); +- } +- if (result >= 0) +- result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, +- &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); +- +- if (result >= 0 && TRACE_ON()) +- vkd3d_shader_trace(&instructions, &parser->shader_version); ++ compiler->input_signature = shader_desc->input_signature; ++ compiler->output_signature = shader_desc->output_signature; ++ compiler->patch_constant_signature = shader_desc->patch_constant_signature; ++ memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); ++ memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); ++ memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); + + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + spirv_compiler_emit_shader_signature_outputs(compiler); +@@ -9541,7 +9635,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + } + + int spirv_compile(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index d066b13ee4e..550f9b27cc7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -430,6 +430,8 @@ enum vkd3d_sm4_register_type + VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, + VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, + VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, ++ ++ VKD3D_SM4_REGISTER_TYPE_COUNT, + }; + + enum vkd3d_sm4_extended_operand_type +@@ -571,6 +573,12 @@ struct sm4_index_range_array + struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; + }; + ++struct vkd3d_sm4_lookup_tables ++{ ++ const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; ++ const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; ++}; ++ + struct vkd3d_shader_sm4_parser + { + const uint32_t *start, *end, *ptr; +@@ -587,6 +595,8 @@ struct vkd3d_shader_sm4_parser + struct sm4_index_range_array output_index_ranges; + struct sm4_index_range_array patch_constant_index_ranges; + ++ struct vkd3d_sm4_lookup_tables lookup; ++ + struct vkd3d_shader_parser p; + }; + +@@ -989,6 +999,8 @@ static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *i + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) + { + ins->declaration.count = *tokens; ++ if (opcode == VKD3D_SM4_OP_DCL_TEMPS) ++ priv->p.shader_desc.temp_count = max(priv->p.shader_desc.temp_count, *tokens); + } + + static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1466,50 +1478,10 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = + {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, + }; + +-static const enum vkd3d_shader_register_type register_type_table[] = +-{ +- /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, +- /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, +- /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, +- /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, +- /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, +- /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, +- /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, +- /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, +- /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, +- /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, +- /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, +- /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, +- /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, +- /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, +- /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, +- /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, +- /* UNKNOWN */ ~0u, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, +- /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, +- /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, +- /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, +- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, +- /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, +- /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, +- /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, +- /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, +- /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, +- /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, +- /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, +- /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, +- /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, +- /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, +- /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, +- /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, +- /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, ++struct vkd3d_sm4_register_type_info ++{ ++ enum vkd3d_sm4_register_type sm4_type; ++ enum vkd3d_shader_register_type vkd3d_type; + }; + + static const enum vkd3d_shader_register_precision register_precision_table[] = +@@ -1522,18 +1494,104 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = + /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, + }; + ++struct tpf_writer ++{ ++ struct hlsl_ctx *ctx; ++ struct vkd3d_bytecode_buffer *buffer; ++ struct vkd3d_sm4_lookup_tables lookup; ++}; ++ + static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) + { + unsigned int i; + + for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) + { +- if (opcode == opcode_table[i].opcode) return &opcode_table[i]; ++ if (opcode == opcode_table[i].opcode) ++ return &opcode_table[i]; + } + + return NULL; + } + ++static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) ++{ ++ const struct vkd3d_sm4_register_type_info *info; ++ unsigned int i; ++ ++ static const struct vkd3d_sm4_register_type_info register_type_table[] = ++ { ++ {VKD3D_SM4_RT_TEMP, VKD3DSPR_TEMP}, ++ {VKD3D_SM4_RT_INPUT, VKD3DSPR_INPUT}, ++ {VKD3D_SM4_RT_OUTPUT, VKD3DSPR_OUTPUT}, ++ {VKD3D_SM4_RT_INDEXABLE_TEMP, VKD3DSPR_IDXTEMP}, ++ {VKD3D_SM4_RT_IMMCONST, VKD3DSPR_IMMCONST}, ++ {VKD3D_SM4_RT_IMMCONST64, VKD3DSPR_IMMCONST64}, ++ {VKD3D_SM4_RT_SAMPLER, VKD3DSPR_SAMPLER}, ++ {VKD3D_SM4_RT_RESOURCE, VKD3DSPR_RESOURCE}, ++ {VKD3D_SM4_RT_CONSTBUFFER, VKD3DSPR_CONSTBUFFER}, ++ {VKD3D_SM4_RT_IMMCONSTBUFFER, VKD3DSPR_IMMCONSTBUFFER}, ++ {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID}, ++ {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT}, ++ {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL}, ++ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER}, ++ {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK}, ++ {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM}, ++ {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY}, ++ {VKD3D_SM5_RT_FUNCTION_POINTER, VKD3DSPR_FUNCTIONPOINTER}, ++ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID, VKD3DSPR_OUTPOINTID}, ++ {VKD3D_SM5_RT_FORK_INSTANCE_ID, VKD3DSPR_FORKINSTID}, ++ {VKD3D_SM5_RT_JOIN_INSTANCE_ID, VKD3DSPR_JOININSTID}, ++ {VKD3D_SM5_RT_INPUT_CONTROL_POINT, VKD3DSPR_INCONTROLPOINT}, ++ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT, VKD3DSPR_OUTCONTROLPOINT}, ++ {VKD3D_SM5_RT_PATCH_CONSTANT_DATA, VKD3DSPR_PATCHCONST}, ++ {VKD3D_SM5_RT_DOMAIN_LOCATION, VKD3DSPR_TESSCOORD}, ++ {VKD3D_SM5_RT_UAV, VKD3DSPR_UAV}, ++ {VKD3D_SM5_RT_SHARED_MEMORY, VKD3DSPR_GROUPSHAREDMEM}, ++ {VKD3D_SM5_RT_THREAD_ID, VKD3DSPR_THREADID}, ++ {VKD3D_SM5_RT_THREAD_GROUP_ID, VKD3DSPR_THREADGROUPID}, ++ {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID}, ++ {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE}, ++ {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX}, ++ {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID}, ++ {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE}, ++ {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE}, ++ {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF}, ++ }; ++ ++ memset(lookup, 0, sizeof(*lookup)); ++ ++ for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) ++ { ++ info = ®ister_type_table[i]; ++ lookup->register_type_info_from_sm4[info->sm4_type] = info; ++ lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; ++ } ++} ++ ++static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++{ ++ tpf->ctx = ctx; ++ tpf->buffer = buffer; ++ init_sm4_lookup_tables(&tpf->lookup); ++} ++ ++static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( ++ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) ++{ ++ if (sm4_type >= VKD3D_SM4_REGISTER_TYPE_COUNT) ++ return NULL; ++ return lookup->register_type_info_from_sm4[sm4_type]; ++} ++ ++static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_type( ++ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) ++{ ++ if (vkd3d_type >= VKD3DSPR_COUNT) ++ return NULL; ++ return lookup->register_type_info_from_vkd3d[vkd3d_type]; ++} ++ + static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) + { + switch (sm4->p.shader_version.type) +@@ -1640,6 +1698,7 @@ static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_typ + static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, + enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) + { ++ const struct vkd3d_sm4_register_type_info *register_type_info; + enum vkd3d_sm4_register_precision precision; + enum vkd3d_sm4_register_type register_type; + enum vkd3d_sm4_extended_operand_type type; +@@ -1654,15 +1713,15 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui + token = *(*ptr)++; + + register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; +- if (register_type >= ARRAY_SIZE(register_type_table) +- || register_type_table[register_type] == VKD3DSPR_INVALID) ++ register_type_info = get_info_from_sm4_register_type(&priv->lookup, register_type); ++ if (!register_type_info) + { + FIXME("Unhandled register type %#x.\n", register_type); + param->type = VKD3DSPR_TEMP; + } + else + { +- param->type = register_type_table[register_type]; ++ param->type = register_type_info->vkd3d_type; + } + param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + param->non_uniform = false; +@@ -2362,6 +2421,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t + sm4->output_map[e->register_index] = e->semantic_index; + } + ++ init_sm4_lookup_tables(&sm4->lookup); ++ + return true; + } + +@@ -2442,6 +2503,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + } + + shader_desc = &sm4->p.shader_desc; ++ shader_desc->is_dxil = false; + if ((ret = shader_extract_from_dxbc(&compile_info->source, + message_context, compile_info->source_name, shader_desc)) < 0) + { +@@ -2499,7 +2561,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; + } + +-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); ++static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); + + static bool type_is_integer(const struct hlsl_type *type) + { +@@ -2516,7 +2578,7 @@ static bool type_is_integer(const struct hlsl_type *type) + } + + bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) ++ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) + { + unsigned int i; + +@@ -2526,24 +2588,24 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + bool output; + enum vkd3d_shader_type shader_type; + enum vkd3d_sm4_swizzle_type swizzle_type; +- enum vkd3d_sm4_register_type type; ++ enum vkd3d_shader_register_type type; + bool has_idx; + } + register_table[] = + { +- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, +- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, +- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, ++ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADID, false}, ++ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADGROUPID, false}, ++ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_LOCALTHREADID, false}, + +- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3DSPR_PRIMID, false}, + + /* Put sv_target in this table, instead of letting it fall through to + * default varying allocation, so that the register index matches the + * usage index. */ +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) +@@ -2552,7 +2614,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type) + { +- *type = register_table[i].type; ++ if (type) ++ *type = register_table[i].type; + if (swizzle_type) + *swizzle_type = register_table[i].swizzle_type; + *has_idx = register_table[i].has_idx; +@@ -2624,7 +2687,8 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant + return true; + } + +-static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) ++static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, ++ uint32_t tag, struct vkd3d_bytecode_buffer *buffer) + { + /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN + * sections to be aligned. Without this, the sections themselves will be +@@ -2632,6 +2696,9 @@ static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_byt + size_t size = bytecode_align(buffer); + + dxbc_writer_add_section(dxbc, tag, buffer->data, size); ++ ++ if (buffer->status < 0) ++ ctx->result = buffer->status; + } + + static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) +@@ -2649,7 +2716,6 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; +- enum vkd3d_sm4_register_type type; + uint32_t usage_idx, reg_idx; + D3D_NAME usage; + bool has_idx; +@@ -2663,14 +2729,13 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + continue; + usage_idx = var->semantic.index; + +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) ++ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, NULL, &has_idx)) + { + reg_idx = has_idx ? var->semantic.index : ~0u; + } + else + { + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); +- type = VKD3D_SM4_RT_INPUT; + reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; + } + +@@ -2739,7 +2804,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + + set_u32(&buffer, count_position, i); + +- add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); ++ add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); + } + + static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +@@ -2827,6 +2892,22 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) + return D3D_SVT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3D_SVT_VOID; ++ case HLSL_TYPE_UAV: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3D_SVT_RWTEXTURE1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3D_SVT_RWTEXTURE2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3D_SVT_RWTEXTURE3D; ++ case HLSL_SAMPLER_DIM_1DARRAY: ++ return D3D_SVT_RWTEXTURE1DARRAY; ++ case HLSL_SAMPLER_DIM_2DARRAY: ++ return D3D_SVT_RWTEXTURE2DARRAY; ++ default: ++ vkd3d_unreachable(); ++ } + default: + vkd3d_unreachable(); + } +@@ -2967,47 +3048,154 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ + } + } + ++struct extern_resource ++{ ++ /* var is only not NULL if this resource is a whole variable, so it may be responsible for more ++ * than one component. */ ++ const struct hlsl_ir_var *var; ++ ++ char *name; ++ struct hlsl_type *data_type; ++ bool is_user_packed; ++ ++ enum hlsl_regset regset; ++ unsigned int id, bind_count; ++}; ++ + static int sm4_compare_extern_resources(const void *a, const void *b) + { +- const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; +- const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; +- enum hlsl_regset aa_regset, bb_regset; ++ const struct extern_resource *aa = (const struct extern_resource *)a; ++ const struct extern_resource *bb = (const struct extern_resource *)b; ++ int r; ++ ++ if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) ++ return r; + +- aa_regset = hlsl_type_get_regset(aa->data_type); +- bb_regset = hlsl_type_get_regset(bb->data_type); ++ return vkd3d_u32_compare(aa->id, bb->id); ++} ++ ++static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) ++{ ++ unsigned int i; + +- if (aa_regset != bb_regset) +- return aa_regset - bb_regset; ++ for (i = 0; i < count; ++i) ++ vkd3d_free(extern_resources[i].name); ++ vkd3d_free(extern_resources); ++} + +- return aa->regs[aa_regset].id - bb->regs[bb_regset].id; ++static const char *string_skip_tag(const char *string) ++{ ++ if (!strncmp(string, "", strlen(""))) ++ return string + strlen(""); ++ return string; + } + +-static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) ++static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) + { +- const struct hlsl_ir_var **extern_resources = NULL; ++ bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; ++ struct extern_resource *extern_resources = NULL; + const struct hlsl_ir_var *var; + enum hlsl_regset regset; + size_t capacity = 0; ++ char *name; + + *count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (!hlsl_type_is_resource(var->data_type)) +- continue; +- regset = hlsl_type_get_regset(var->data_type); +- if (!var->regs[regset].allocated) +- continue; +- +- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, +- sizeof(*extern_resources)))) ++ if (separate_components) + { +- *count = 0; +- return NULL; ++ unsigned int component_count = hlsl_type_component_count(var->data_type); ++ unsigned int k, regset_offset; ++ ++ for (k = 0; k < component_count; ++k) ++ { ++ struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); ++ struct vkd3d_string_buffer *name_buffer; ++ ++ if (!hlsl_type_is_resource(component_type)) ++ continue; ++ ++ regset = hlsl_type_get_regset(component_type); ++ regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); ++ ++ if (regset_offset > var->regs[regset].allocation_size) ++ continue; ++ ++ if (var->objects_usage[regset][regset_offset].used) ++ { ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, ++ sizeof(*extern_resources)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ hlsl_release_string_buffer(ctx, name_buffer); ++ return NULL; ++ } ++ hlsl_release_string_buffer(ctx, name_buffer); ++ ++ extern_resources[*count].var = NULL; ++ ++ extern_resources[*count].name = name; ++ extern_resources[*count].data_type = component_type; ++ extern_resources[*count].is_user_packed = false; ++ ++ extern_resources[*count].regset = regset; ++ extern_resources[*count].id = var->regs[regset].id + regset_offset; ++ extern_resources[*count].bind_count = 1; ++ ++ ++*count; ++ } ++ } + } ++ else ++ { ++ if (!hlsl_type_is_resource(var->data_type)) ++ continue; ++ regset = hlsl_type_get_regset(var->data_type); ++ if (!var->regs[regset].allocated) ++ continue; ++ ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, ++ sizeof(*extern_resources)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } + +- extern_resources[*count] = var; +- ++*count; ++ if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ extern_resources[*count].var = var; ++ ++ extern_resources[*count].name = name; ++ extern_resources[*count].data_type = var->data_type; ++ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; ++ ++ extern_resources[*count].regset = regset; ++ extern_resources[*count].id = var->regs[regset].id; ++ extern_resources[*count].bind_count = var->bind_count[regset]; ++ ++ ++*count; ++ } + } + + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); +@@ -3020,8 +3208,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + size_t cbuffer_position, resource_position, creator_position; + const struct hlsl_profile_info *profile = ctx->profile; +- const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; ++ struct extern_resource *extern_resources; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + +@@ -3075,18 +3263,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + + for (i = 0; i < extern_resources_count; ++i) + { +- enum hlsl_regset regset; ++ const struct extern_resource *resource = &extern_resources[i]; + uint32_t flags = 0; + +- var = extern_resources[i]; +- regset = hlsl_type_get_regset(var->data_type); +- +- if (var->reg_reservation.reg_type) ++ if (resource->is_user_packed) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, sm4_resource_type(var->data_type)); +- if (regset == HLSL_REGSET_SAMPLERS) ++ put_u32(&buffer, sm4_resource_type(resource->data_type)); ++ if (resource->regset == HLSL_REGSET_SAMPLERS) + { + put_u32(&buffer, 0); + put_u32(&buffer, 0); +@@ -3094,15 +3279,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + } + else + { +- unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; ++ unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource_format->dimx; + +- put_u32(&buffer, sm4_resource_format(var->data_type)); +- put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); ++ put_u32(&buffer, sm4_resource_format(resource->data_type)); ++ put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } +- put_u32(&buffer, var->regs[regset].id); +- put_u32(&buffer, var->regs[regset].bind_count); ++ put_u32(&buffer, resource->id); ++ put_u32(&buffer, resource->bind_count); + put_u32(&buffer, flags); + } + +@@ -3128,9 +3313,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + + for (i = 0; i < extern_resources_count; ++i) + { +- var = extern_resources[i]; ++ const struct extern_resource *resource = &extern_resources[i]; + +- string_offset = put_string(&buffer, var->name); ++ string_offset = put_string(&buffer, resource->name); + set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); + } + +@@ -3234,9 +3419,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(&buffer, creator_position, creator_offset); + +- add_section(dxbc, TAG_RDEF, &buffer); ++ add_section(ctx, dxbc, TAG_RDEF, &buffer); + +- vkd3d_free(extern_resources); ++ sm4_free_extern_resources(extern_resources, extern_resources_count); + } + + static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) +@@ -3308,8 +3493,8 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod + + struct sm4_register + { +- enum vkd3d_sm4_register_type type; +- uint32_t idx[2]; ++ enum vkd3d_shader_register_type type; ++ struct vkd3d_shader_register_index idx[2]; + unsigned int idx_count; + enum vkd3d_sm4_dimension dim; + uint32_t immconst_uint[4]; +@@ -3346,8 +3531,9 @@ struct sm4_instruction + + static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, + unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, +- const struct hlsl_deref *deref, const struct hlsl_type *data_type) ++ const struct hlsl_deref *deref) + { ++ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) +@@ -3356,37 +3542,37 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + + if (regset == HLSL_REGSET_TEXTURES) + { +- reg->type = VKD3D_SM4_RT_RESOURCE; ++ reg->type = VKD3DSPR_RESOURCE; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; +- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); +- assert(deref->offset_regset == HLSL_REGSET_TEXTURES); ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ assert(regset == HLSL_REGSET_TEXTURES); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { +- reg->type = VKD3D_SM5_RT_UAV; ++ reg->type = VKD3DSPR_UAV; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; +- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); +- assert(deref->offset_regset == HLSL_REGSET_UAVS); ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ assert(regset == HLSL_REGSET_UAVS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { +- reg->type = VKD3D_SM4_RT_SAMPLER; ++ reg->type = VKD3DSPR_SAMPLER; + reg->dim = VKD3D_SM4_DIMENSION_NONE; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; +- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); +- assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ assert(regset == HLSL_REGSET_SAMPLERS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } +@@ -3395,12 +3581,12 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + assert(data_type->class <= HLSL_CLASS_VECTOR); +- reg->type = VKD3D_SM4_RT_CONSTBUFFER; ++ reg->type = VKD3DSPR_CONSTBUFFER; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = var->buffer->reg.id; +- reg->idx[1] = offset / 4; ++ reg->idx[0].offset = var->buffer->reg.id; ++ reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } +@@ -3415,7 +3601,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + + if (has_idx) + { +- reg->idx[0] = var->semantic.index + offset / 4; ++ reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + +@@ -3427,11 +3613,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); +- reg->type = VKD3D_SM4_RT_INPUT; ++ reg->type = VKD3DSPR_INPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = hlsl_reg.id; ++ reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +@@ -3446,11 +3632,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + + if (has_idx) + { +- reg->idx[0] = var->semantic.index + offset / 4; ++ reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + +- if (reg->type == VKD3D_SM4_RT_DEPTHOUT) ++ if (reg->type == VKD3DSPR_DEPTHOUT) + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + else + reg->dim = VKD3D_SM4_DIMENSION_VEC4; +@@ -3461,9 +3647,9 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); +- reg->type = VKD3D_SM4_RT_OUTPUT; ++ reg->type = VKD3DSPR_OUTPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- reg->idx[0] = hlsl_reg.id; ++ reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +@@ -3473,22 +3659,22 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); +- reg->type = VKD3D_SM4_RT_TEMP; ++ reg->type = VKD3DSPR_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = hlsl_reg.id; ++ reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + + static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, +- const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) ++ const struct hlsl_deref *deref, unsigned int map_writemask) + { + unsigned int writemask; + +- sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); ++ sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); + } +@@ -3497,10 +3683,10 @@ static void sm4_register_from_node(struct sm4_register *reg, unsigned int *write + enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) + { + assert(instr->reg.allocated); +- reg->type = VKD3D_SM4_RT_TEMP; ++ reg->type = VKD3DSPR_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = instr->reg.id; ++ reg->idx[0].offset = instr->reg.id; + reg->idx_count = 1; + *writemask = instr->reg.writemask; + } +@@ -3516,7 +3702,7 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, + const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) + { + src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- src->reg.type = VKD3D_SM4_RT_IMMCONST; ++ src->reg.type = VKD3DSPR_IMMCONST; + if (width == 1) + { + src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; +@@ -3553,17 +3739,100 @@ static void sm4_src_from_node(struct sm4_src_register *src, + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); + } + +-static uint32_t sm4_encode_register(const struct sm4_register *reg) ++static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct sm4_dst_register *dst) + { +- return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) +- | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) +- | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); ++ const struct vkd3d_sm4_register_type_info *register_type_info; ++ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; ++ uint32_t sm4_reg_type, reg_dim; ++ uint32_t token = 0; ++ unsigned int j; ++ ++ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); ++ if (!register_type_info) ++ { ++ FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); ++ sm4_reg_type = VKD3D_SM4_RT_TEMP; ++ } ++ else ++ { ++ sm4_reg_type = register_type_info->sm4_type; ++ } ++ ++ reg_dim = dst->reg.dim; ++ ++ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; ++ token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; ++ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; ++ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) ++ token |= dst->writemask << VKD3D_SM4_WRITEMASK_SHIFT; ++ put_u32(buffer, token); ++ ++ for (j = 0; j < dst->reg.idx_count; ++j) ++ { ++ put_u32(buffer, dst->reg.idx[j].offset); ++ assert(!dst->reg.idx[j].rel_addr); ++ } ++} ++ ++static void sm4_write_src_register(const struct tpf_writer *tpf, const struct sm4_src_register *src) ++{ ++ const struct vkd3d_sm4_register_type_info *register_type_info; ++ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; ++ uint32_t sm4_reg_type, reg_dim; ++ uint32_t token = 0; ++ unsigned int j; ++ ++ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); ++ if (!register_type_info) ++ { ++ FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); ++ sm4_reg_type = VKD3D_SM4_RT_TEMP; ++ } ++ else ++ { ++ sm4_reg_type = register_type_info->sm4_type; ++ } ++ ++ reg_dim = src->reg.dim; ++ ++ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; ++ token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; ++ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; ++ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) ++ { ++ token |= (uint32_t)src->swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ token |= src->swizzle << VKD3D_SM4_SWIZZLE_SHIFT; ++ } ++ if (src->reg.mod) ++ token |= VKD3D_SM4_EXTENDED_OPERAND; ++ put_u32(buffer, token); ++ ++ if (src->reg.mod) ++ put_u32(buffer, (src->reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) ++ | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); ++ ++ for (j = 0; j < src->reg.idx_count; ++j) ++ { ++ put_u32(buffer, src->reg.idx[j].offset); ++ assert(!src->reg.idx[j].rel_addr); ++ } ++ ++ if (src->reg.type == VKD3DSPR_IMMCONST) ++ { ++ put_u32(buffer, src->reg.immconst_uint[0]); ++ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) ++ { ++ put_u32(buffer, src->reg.immconst_uint[1]); ++ put_u32(buffer, src->reg.immconst_uint[2]); ++ put_u32(buffer, src->reg.immconst_uint[3]); ++ } ++ } + } + + static uint32_t sm4_register_order(const struct sm4_register *reg) + { + uint32_t order = 1; +- if (reg->type == VKD3D_SM4_RT_IMMCONST) ++ if (reg->type == VKD3DSPR_IMMCONST) + order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; + order += reg->idx_count; + if (reg->mod) +@@ -3571,8 +3840,9 @@ static uint32_t sm4_register_order(const struct sm4_register *reg) + return order; + } + +-static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) ++static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) + { ++ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = instr->opcode; + unsigned int size = 1, i, j; + +@@ -3600,43 +3870,10 @@ static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const st + } + + for (i = 0; i < instr->dst_count; ++i) +- { +- token = sm4_encode_register(&instr->dsts[i].reg); +- if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) +- token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; +- put_u32(buffer, token); +- +- for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) +- put_u32(buffer, instr->dsts[i].reg.idx[j]); +- } ++ sm4_write_dst_register(tpf, &instr->dsts[i]); + + for (i = 0; i < instr->src_count; ++i) +- { +- token = sm4_encode_register(&instr->srcs[i].reg); +- token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; +- token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; +- if (instr->srcs[i].reg.mod) +- token |= VKD3D_SM4_EXTENDED_OPERAND; +- put_u32(buffer, token); +- +- if (instr->srcs[i].reg.mod) +- put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) +- | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); +- +- for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) +- put_u32(buffer, instr->srcs[i].reg.idx[j]); +- +- if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) +- { +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); +- if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) +- { +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); +- } +- } +- } ++ sm4_write_src_register(tpf, &instr->srcs[i]); + + if (instr->byte_stride) + put_u32(buffer, instr->byte_stride); +@@ -3672,67 +3909,75 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + return true; + } + +-static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) ++static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) + { + const struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + + .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, +- .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, +- .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, ++ .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, ++ .srcs[0].reg.idx[0].offset = cbuffer->reg.id, ++ .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, + .srcs[0].reg.idx_count = 2, + .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, + .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), + .src_count = 1, + }; +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) ++static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) + { +- unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; ++ struct hlsl_type *component_type; ++ unsigned int i; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + +- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, ++ .dsts[0].reg.type = VKD3DSPR_SAMPLER, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + +- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) ++ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); ++ ++ if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; + +- for (i = 0; i < count; ++i) ++ assert(resource->regset == HLSL_REGSET_SAMPLERS); ++ ++ for (i = 0; i < resource->bind_count; ++i) + { +- if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) ++ if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + +- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id + i; +- write_sm4_instruction(buffer, &instr); ++ instr.dsts[0].reg.idx[0].offset = resource->id + i; ++ write_sm4_instruction(tpf, &instr); + } + } + +-static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_var *var, bool uav) ++static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, ++ bool uav) + { + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; +- unsigned int i, count = var->data_type->reg_size[regset]; + struct hlsl_type *component_type; + struct sm4_instruction instr; ++ unsigned int i; + +- component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); ++ assert(resource->regset == regset); + +- for (i = 0; i < count; ++i) ++ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); ++ ++ for (i = 0; i < resource->bind_count; ++i) + { +- if (!var->objects_usage[regset][i].used) ++ if (resource->var && !resource->var->objects_usage[regset][i].used) + continue; + + instr = (struct sm4_instruction) + { +- .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, +- .dsts[0].reg.idx = {var->regs[regset].id + i}, ++ .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, ++ .dsts[0].reg.idx[0].offset = resource->id + i, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + +@@ -3742,11 +3987,11 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + + if (uav) + { +- switch (var->data_type->sampler_dim) ++ switch (resource->data_type->sampler_dim) + { + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; +- instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; ++ instr.byte_stride = resource->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; + break; + default: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; +@@ -3765,13 +4010,13 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + } + +-static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) ++static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) + { +- const struct hlsl_profile_info *profile = ctx->profile; ++ const struct hlsl_profile_info *profile = tpf->ctx->profile; + const bool output = var->is_output_semantic; + D3D_NAME usage; + bool has_idx; +@@ -3782,11 +4027,11 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + .dst_count = 1, + }; + +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) ++ if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) + { + if (has_idx) + { +- instr.dsts[0].reg.idx[0] = var->semantic.index; ++ instr.dsts[0].reg.idx[0].offset = var->semantic.index; + instr.dsts[0].reg.idx_count = 1; + } + else +@@ -3797,16 +4042,16 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + } + else + { +- instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; +- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; ++ instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; ++ instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + instr.dsts[0].reg.idx_count = 1; + instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + +- if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) ++ if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) + instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + +- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); ++ hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); + if (usage == ~0u) + usage = D3D_NAME_UNDEFINED; + +@@ -3866,10 +4111,10 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + break; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) ++static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) + { + struct sm4_instruction instr = + { +@@ -3879,33 +4124,35 @@ static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t t + .idx_count = 1, + }; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) ++static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) + { + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, + +- .idx = {thread_count[0], thread_count[1], thread_count[2]}, ++ .idx[0] = thread_count[0], ++ .idx[1] = thread_count[1], ++ .idx[2] = thread_count[2], + .idx_count = 3, + }; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) ++static void write_sm4_ret(const struct tpf_writer *tpf) + { + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_RET, + }; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) + { + struct sm4_instruction instr; +@@ -3920,12 +4167,11 @@ static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_ + instr.srcs[0].reg.mod = src_mod; + instr.src_count = 1; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, +- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, +- const struct hlsl_ir_node *src) ++static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++ const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src) + { + struct sm4_instruction instr; + +@@ -3935,7 +4181,7 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); +- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; ++ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; +@@ -3943,10 +4189,10 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); + instr.src_count = 1; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { + struct sm4_instruction instr; +@@ -3961,11 +4207,11 @@ static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + + /* dp# instructions don't map the swizzle. */ +-static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { + struct sm4_instruction instr; +@@ -3980,10 +4226,10 @@ static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum v + sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, ++static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { +@@ -3995,7 +4241,7 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); +- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; ++ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; +@@ -4004,15 +4250,15 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, ++static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, + const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, + enum hlsl_sampler_dim dim) + { ++ const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); + bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); +@@ -4029,7 +4275,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { +- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } +@@ -4052,7 +4298,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + + sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); + +- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + + instr.src_count = 2; + +@@ -4067,13 +4313,13 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + + memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); + instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- reg->type = VKD3D_SM4_RT_IMMCONST; ++ reg->type = VKD3DSPR_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = index->value.u[0].u; + } +- else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) ++ else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) + { +- hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); ++ hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + } + else + { +@@ -4083,13 +4329,11 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + ++instr.src_count; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_resource_load *load) ++static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) + { +- const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; +@@ -4132,7 +4376,7 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { +- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } +@@ -4142,8 +4386,8 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); +- sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 3; + + if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD +@@ -4165,7 +4409,52 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + ++instr.src_count; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *dst = &load->node; ++ struct sm4_instruction instr; ++ ++ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; ++ if (dst->data_type->base_type == HLSL_TYPE_UINT) ++ instr.opcode |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); ++ instr.src_count = 1; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *dst = &load->node; ++ struct sm4_instruction instr; ++ ++ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_RESINFO; ++ if (dst->data_type->base_type == HLSL_TYPE_UINT) ++ instr.opcode |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); ++ instr.src_count = 2; ++ ++ write_sm4_instruction(tpf, &instr); + } + + static bool type_is_float(const struct hlsl_type *type) +@@ -4173,8 +4462,7 @@ static bool type_is_float(const struct hlsl_type *type) + return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; + } + +-static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, ++static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, + const struct hlsl_ir_node *arg, uint32_t mask) + { + struct sm4_instruction instr; +@@ -4187,16 +4475,15 @@ static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, + + sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); + instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; ++ instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; + instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + instr.srcs[1].reg.immconst_uint[0] = mask; + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_cast(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) ++static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) + { + static const union + { +@@ -4218,23 +4505,23 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); ++ write_sm4_cast_from_bool(tpf, expr, arg1, one.u); + break; + + case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); + break; + + default: +@@ -4247,20 +4534,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); ++ write_sm4_cast_from_bool(tpf, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); + break; + + default: +@@ -4273,20 +4560,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); ++ write_sm4_cast_from_bool(tpf, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); + break; + + default: +@@ -4295,7 +4582,7 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + break; + + case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); + break; + + case HLSL_TYPE_BOOL: +@@ -4305,26 +4592,25 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + } + } + +-static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) ++static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, ++ const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) + { + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; + +- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); ++ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_expr(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) ++static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) + { + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_ir_node *arg2 = expr->operands[1].node; +@@ -4333,7 +4619,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + + assert(expr->node.reg.allocated); + +- if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) ++ if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) + return; + + switch (expr->op) +@@ -4342,161 +4628,181 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_BIT_NOT: + assert(type_is_integer(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_CAST: +- write_sm4_cast(ctx, buffer, expr); ++ write_sm4_cast(tpf, expr); + break; + + case HLSL_OP1_COS: + assert(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); ++ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + break; + + case HLSL_OP1_DSX: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_DSX_COARSE: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_DSX_FINE: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_DSY_COARSE: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_DSY_FINE: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_EXP2: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FLOOR: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FRACT: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOG2: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOGIC_NOT: + assert(dst_type->base_type == HLSL_TYPE_BOOL); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_NEG: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_REINTERPRET: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_OP1_ROUND: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_RSQ: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); + break; + + case HLSL_OP1_SAT: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV + | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), + &expr->node, arg1, 0); + break; + + case HLSL_OP1_SIN: + assert(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); ++ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + break; + + case HLSL_OP1_SQRT: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_TRUNC: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); + break; + + case HLSL_OP2_ADD: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_BIT_AND: + assert(type_is_integer(dst_type)); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_OR: + assert(type_is_integer(dst_type)); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: + assert(type_is_integer(dst_type)); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_DIV: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); ++ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); + } + break; + +@@ -4507,15 +4813,15 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (arg1->data_type->dimx) + { + case 4: +- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); ++ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); + break; + + case 3: +- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); ++ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); + break; + + case 2: +- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); ++ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); + break; + + case 1: +@@ -4525,7 +4831,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); + } + break; + +@@ -4538,18 +4844,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; +@@ -4564,21 +4870,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; +@@ -4593,21 +4899,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; +@@ -4615,37 +4921,37 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + + case HLSL_OP2_LOGIC_AND: + assert(dst_type->base_type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LOGIC_OR: + assert(dst_type->base_type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MAX: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); + } + break; + +@@ -4653,19 +4959,19 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); + } + break; + +@@ -4673,11 +4979,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (dst_type->base_type) + { + case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); ++ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); + } + break; + +@@ -4685,18 +4991,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + /* Using IMUL instead of UMUL because we're taking the low + * bits, and the native compiler generates IMUL. */ +- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); ++ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); + } + break; + +@@ -4709,18 +5015,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; +@@ -4729,18 +5035,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + case HLSL_OP2_RSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, ++ write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); + } + +- hlsl_release_string_buffer(ctx, dst_type_string); ++ hlsl_release_string_buffer(tpf->ctx, dst_type_string); + } + +-static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) ++static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) + { + struct sm4_instruction instr = + { +@@ -4751,26 +5057,25 @@ static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + assert(iff->condition.node->data_type->dimx == 1); + + sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + +- write_sm4_block(ctx, buffer, &iff->then_block); ++ write_sm4_block(tpf, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + instr.opcode = VKD3D_SM4_OP_ELSE; + instr.src_count = 0; +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + +- write_sm4_block(ctx, buffer, &iff->else_block); ++ write_sm4_block(tpf, &iff->else_block); + } + + instr.opcode = VKD3D_SM4_OP_ENDIF; + instr.src_count = 0; +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_jump(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) ++static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) + { + struct sm4_instruction instr = {0}; + +@@ -4780,19 +5085,13 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, + instr.opcode = VKD3D_SM4_OP_BREAK; + break; + +- case HLSL_IR_JUMP_DISCARD: ++ case HLSL_IR_JUMP_DISCARD_NZ: + { +- struct sm4_register *reg = &instr.srcs[0].reg; +- + instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; + + memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); +- instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + instr.src_count = 1; +- reg->type = VKD3D_SM4_RT_IMMCONST; +- reg->dim = VKD3D_SM4_DIMENSION_SCALAR; +- reg->immconst_uint[0] = ~0u; +- ++ sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); + break; + } + +@@ -4800,11 +5099,11 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, + vkd3d_unreachable(); + + default: +- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); ++ hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + return; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + + /* Does this variable's data come directly from the API user, rather than being +@@ -4818,8 +5117,7 @@ static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *va + return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; + } + +-static void write_sm4_load(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) ++static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) + { + const struct hlsl_type *type = load->node.data_type; + struct sm4_instruction instr; +@@ -4830,7 +5128,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, + instr.dst_count = 1; + + assert(type->class <= HLSL_CLASS_LAST_NUMERIC); +- if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) ++ if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) + { + struct hlsl_constant_value value; + +@@ -4839,7 +5137,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, + + instr.opcode = VKD3D_SM4_OP_MOVC; + +- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); + + memset(&value, 0xff, sizeof(value)); + sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); +@@ -4851,33 +5149,31 @@ static void write_sm4_load(struct hlsl_ctx *ctx, + { + instr.opcode = VKD3D_SM4_OP_MOV; + +- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); + instr.src_count = 1; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_loop(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) ++static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) + { + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_LOOP, + }; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + +- write_sm4_block(ctx, buffer, &loop->body); ++ write_sm4_block(tpf, &loop->body); + + instr.opcode = VKD3D_SM4_OP_ENDLOOP; +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, +- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, +- const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) ++static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, ++ const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, ++ unsigned int swizzle, const struct hlsl_ir_node *texel_offset) + { + struct sm4_src_register *src; + struct sm4_instruction instr; +@@ -4895,9 +5191,9 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { +- if (ctx->profile->major_version < 5) ++ if (tpf->ctx->profile->major_version < 5) + { +- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return; + } +@@ -4906,58 +5202,39 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + } + } + +- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask); + + src = &instr.srcs[instr.src_count++]; +- sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf->ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; + src->swizzle = swizzle; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_resource_load(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) ++static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) + { +- const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *sample_index = load->sample_index.node; + const struct hlsl_ir_node *coords = load->coords.node; + +- if (!hlsl_type_is_resource(resource_type)) ++ if (load->sampler.var && !load->sampler.var->is_uniform) + { +- hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); ++ hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return; + } + +- if (load->sampler.var) +- { +- const struct hlsl_type *sampler_type = load->sampler.var->data_type; +- +- if (!hlsl_type_is_resource(sampler_type)) +- { +- hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); +- return; +- } +- +- if (!load->sampler.var->is_uniform) +- { +- hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); +- return; +- } +- } +- + if (!load->resource.var->is_uniform) + { +- hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); ++ hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); + return; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: +- write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, ++ write_sm4_ld(tpf, &load->node, &load->resource, + coords, sample_index, texel_offset, load->sampling_dim); + break; + +@@ -4967,64 +5244,61 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_GRAD: +- if (!load->sampler.var) +- { +- hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); +- return; +- } +- write_sm4_sample(ctx, buffer, load); ++ /* Combined sample expressions were lowered. */ ++ assert(load->sampler.var); ++ write_sm4_sample(tpf, load); + break; + + case HLSL_RESOURCE_GATHER_RED: +- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, +- &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, ++ HLSL_SWIZZLE(X, X, X, X), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_GREEN: +- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, +- &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, ++ HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_BLUE: +- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, +- &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, ++ HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_ALPHA: +- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, +- &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, ++ HLSL_SWIZZLE(W, W, W, W), texel_offset); ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_INFO: ++ write_sm4_sampleinfo(tpf, load); ++ break; ++ ++ case HLSL_RESOURCE_RESINFO: ++ write_sm4_resinfo(tpf, load); + break; + } + } + +-static void write_sm4_resource_store(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) ++static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) + { +- const struct hlsl_type *resource_type = store->resource.var->data_type; +- +- if (!hlsl_type_is_resource(resource_type)) +- { +- hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); +- return; +- } ++ struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); + + if (!store->resource.var->is_uniform) + { +- hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); ++ hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); + return; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { +- hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); ++ hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); + return; + } + +- write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); ++ write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); + } + +-static void write_sm4_store(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) ++static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) + { + const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm4_instruction instr; +@@ -5033,18 +5307,17 @@ static void write_sm4_store(struct hlsl_ctx *ctx, + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + +- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); ++ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); + instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); + instr.src_count = 1; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_swizzle(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) ++static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) + { + struct sm4_instruction instr; + unsigned int writemask; +@@ -5060,11 +5333,10 @@ static void write_sm4_swizzle(struct hlsl_ctx *ctx, + swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); + instr.src_count = 1; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_block *block) ++static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) + { + const struct hlsl_ir_node *instr; + +@@ -5074,12 +5346,12 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { +- hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); + break; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { +- hlsl_fixme(ctx, &instr->loc, "Object copy."); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); + break; + } + +@@ -5099,43 +5371,43 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + vkd3d_unreachable(); + + case HLSL_IR_EXPR: +- write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); ++ write_sm4_expr(tpf, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: +- write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); ++ write_sm4_if(tpf, hlsl_ir_if(instr)); + break; + + case HLSL_IR_JUMP: +- write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); ++ write_sm4_jump(tpf, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_LOAD: +- write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); ++ write_sm4_load(tpf, hlsl_ir_load(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: +- write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); ++ write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_RESOURCE_STORE: +- write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); ++ write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); + break; + + case HLSL_IR_LOOP: +- write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); ++ write_sm4_loop(tpf, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_STORE: +- write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); ++ write_sm4_store(tpf, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: +- write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); ++ write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); + break; + + default: +- hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } + } +@@ -5144,12 +5416,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) + { + const struct hlsl_profile_info *profile = ctx->profile; +- const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; ++ struct extern_resource *extern_resources; + unsigned int extern_resources_count, i; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + size_t token_count_position; ++ struct tpf_writer tpf; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { +@@ -5164,6 +5437,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + VKD3D_SM4_LIB, + }; + ++ tpf_writer_init(&tpf, ctx, &buffer); ++ + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); +@@ -5172,45 +5447,42 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) +- write_sm4_dcl_constant_buffer(&buffer, cbuffer); ++ write_sm4_dcl_constant_buffer(&tpf, cbuffer); + } + + for (i = 0; i < extern_resources_count; ++i) + { +- enum hlsl_regset regset; ++ const struct extern_resource *resource = &extern_resources[i]; + +- var = extern_resources[i]; +- regset = hlsl_type_get_regset(var->data_type); +- +- if (regset == HLSL_REGSET_SAMPLERS) +- write_sm4_dcl_samplers(&buffer, var); +- else if (regset == HLSL_REGSET_TEXTURES) +- write_sm4_dcl_textures(ctx, &buffer, var, false); +- else if (regset == HLSL_REGSET_UAVS) +- write_sm4_dcl_textures(ctx, &buffer, var, true); ++ if (resource->regset == HLSL_REGSET_SAMPLERS) ++ write_sm4_dcl_samplers(&tpf, resource); ++ else if (resource->regset == HLSL_REGSET_TEXTURES) ++ write_sm4_dcl_textures(&tpf, resource, false); ++ else if (resource->regset == HLSL_REGSET_UAVS) ++ write_sm4_dcl_textures(&tpf, resource, true); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) +- write_sm4_dcl_semantic(ctx, &buffer, var); ++ write_sm4_dcl_semantic(&tpf, var); + } + + if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) +- write_sm4_dcl_thread_group(&buffer, ctx->thread_count); ++ write_sm4_dcl_thread_group(&tpf, ctx->thread_count); + + if (ctx->temp_count) +- write_sm4_dcl_temps(&buffer, ctx->temp_count); ++ write_sm4_dcl_temps(&tpf, ctx->temp_count); + +- write_sm4_block(ctx, &buffer, &entry_func->body); ++ write_sm4_block(&tpf, &entry_func->body); + +- write_sm4_ret(&buffer); ++ write_sm4_ret(&tpf); + + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + +- add_section(dxbc, TAG_SHDR, &buffer); ++ add_section(ctx, dxbc, TAG_SHDR, &buffer); + +- vkd3d_free(extern_resources); ++ sm4_free_extern_resources(extern_resources, extern_resources_count); + } + + int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 343fdb2252e..c777bad2206 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -415,6 +415,8 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t + return "hlsl"; + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + return "d3dbc"; ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ return "dxil"; + default: + FIXME("Unhandled source type %#x.\n", type); + return "bin"; +@@ -440,6 +442,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, + shader_get_source_type_suffix(source_type), shader->code, shader->size); + } + ++static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) ++{ ++ struct vkd3d_shader_scan_signature_info *signature_info; ++ ++ if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) ++ { ++ memset(&signature_info->input, 0, sizeof(signature_info->input)); ++ memset(&signature_info->output, 0, sizeof(signature_info->output)); ++ memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); ++ } ++} ++ + bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, + struct vkd3d_shader_message_context *message_context, const char *source_name, + const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, +@@ -526,9 +540,46 @@ void vkd3d_shader_free_messages(char *messages) + vkd3d_free(messages); + } + ++static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, ++ const struct shader_signature *src) ++{ ++ unsigned int i; ++ ++ signature->element_count = src->element_count; ++ if (!src->elements) ++ { ++ assert(!signature->element_count); ++ signature->elements = NULL; ++ return true; ++ } ++ ++ if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) ++ return false; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ struct vkd3d_shader_signature_element *d = &signature->elements[i]; ++ struct signature_element *e = &src->elements[i]; ++ ++ d->semantic_name = e->semantic_name; ++ d->semantic_index = e->semantic_index; ++ d->stream_index = e->stream_index; ++ d->sysval_semantic = e->sysval_semantic; ++ d->component_type = e->component_type; ++ d->register_index = e->register_index; ++ if (e->register_count > 1) ++ FIXME("Arrayed elements are not supported yet.\n"); ++ d->mask = e->mask; ++ d->used_mask = e->used_mask; ++ d->min_precision = e->min_precision; ++ } ++ ++ return true; ++} ++ + struct vkd3d_shader_scan_context + { +- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; + size_t descriptors_size; + + struct vkd3d_shader_message_context *message_context; +@@ -548,20 +599,12 @@ struct vkd3d_shader_scan_context + size_t cf_info_size; + size_t cf_info_count; + +- struct +- { +- unsigned int id; +- unsigned int descriptor_idx; +- } *uav_ranges; +- size_t uav_ranges_size; +- size_t uav_range_count; +- + enum vkd3d_shader_api_version api_version; + }; + + static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context) + { + unsigned int i; +@@ -584,7 +627,6 @@ static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *con + + static void vkd3d_shader_scan_context_cleanup(struct vkd3d_shader_scan_context *context) + { +- vkd3d_free(context->uav_ranges); + vkd3d_free(context->cf_info); + } + +@@ -652,18 +694,23 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_inf + return NULL; + } + +-static struct vkd3d_shader_descriptor_info *vkd3d_shader_scan_get_uav_descriptor_info( +- const struct vkd3d_shader_scan_context *context, unsigned int range_id) ++static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_context *context, ++ const struct vkd3d_shader_register *reg, uint32_t flag) + { ++ unsigned int range_id = reg->idx[0].offset; + unsigned int i; + +- for (i = 0; i < context->uav_range_count; ++i) ++ if (!context->scan_descriptor_info) ++ return; ++ ++ for (i = 0; i < context->scan_descriptor_info->descriptor_count; ++i) + { +- if (context->uav_ranges[i].id == range_id) +- return &context->scan_descriptor_info->descriptors[context->uav_ranges[i].descriptor_idx]; ++ if (context->scan_descriptor_info->descriptors[i].register_id == range_id) ++ { ++ context->scan_descriptor_info->descriptors[i].flags |= flag; ++ break; ++ } + } +- +- return NULL; + } + + static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) +@@ -679,13 +726,7 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr + static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) + { +- struct vkd3d_shader_descriptor_info *d; +- +- if (!context->scan_descriptor_info) +- return; +- +- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); +- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; ++ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); + } + + static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) +@@ -698,13 +739,7 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in + static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) + { +- struct vkd3d_shader_descriptor_info *d; +- +- if (!context->scan_descriptor_info) +- return; +- +- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); +- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER; ++ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER); + } + + static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) +@@ -717,22 +752,16 @@ static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_ + static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) + { +- struct vkd3d_shader_descriptor_info *d; +- +- if (!context->scan_descriptor_info) +- return; +- +- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); +- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS; ++ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); + } + + static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, +- enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range, +- enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_resource_data_type resource_data_type, +- unsigned int flags) ++ enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, ++ const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, ++ enum vkd3d_shader_resource_data_type resource_data_type, unsigned int flags) + { +- struct vkd3d_shader_scan_descriptor_info *info = context->scan_descriptor_info; +- struct vkd3d_shader_descriptor_info *d; ++ struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; ++ struct vkd3d_shader_descriptor_info1 *d; + + if (!vkd3d_array_reserve((void **)&info->descriptors, &context->descriptors_size, + info->descriptor_count + 1, sizeof(*info->descriptors))) +@@ -743,6 +772,7 @@ static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *c + + d = &info->descriptors[info->descriptor_count]; + d->type = type; ++ d->register_id = reg->idx[0].offset; + d->register_space = range->space; + d->register_index = range->first; + d->resource_type = resource_type; +@@ -754,23 +784,6 @@ static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *c + return true; + } + +-static bool vkd3d_shader_scan_add_uav_range(struct vkd3d_shader_scan_context *context, +- unsigned int id, unsigned int descriptor_idx) +-{ +- if (!vkd3d_array_reserve((void **)&context->uav_ranges, &context->uav_ranges_size, +- context->uav_range_count + 1, sizeof(*context->uav_ranges))) +- { +- ERR("Failed to allocate UAV range.\n"); +- return false; +- } +- +- context->uav_ranges[context->uav_range_count].id = id; +- context->uav_ranges[context->uav_range_count].descriptor_idx = descriptor_idx; +- ++context->uav_range_count; +- +- return true; +-} +- + static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) + { +@@ -779,7 +792,7 @@ static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_sc + if (!context->scan_descriptor_info) + return; + +- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->range, ++ vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->src.reg, &cb->range, + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); + } + +@@ -796,7 +809,7 @@ static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_conte + flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; + else + flags = 0; +- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->range, ++ vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->src.reg, &sampler->range, + VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); + } + +@@ -813,10 +826,8 @@ static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_cont + type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + else + type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; +- vkd3d_shader_scan_add_descriptor(context, type, &resource->range, resource_type, resource_data_type, 0); +- if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) +- vkd3d_shader_scan_add_uav_range(context, resource->reg.reg.idx[0].offset, +- context->scan_descriptor_info->descriptor_count - 1); ++ vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, &resource->range, ++ resource_type, resource_data_type, 0); + } + + static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, +@@ -1066,22 +1077,64 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + return VKD3D_OK; + } + ++static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descriptor_info *info, ++ const struct vkd3d_shader_scan_descriptor_info1 *info1) ++{ ++ unsigned int i; ++ ++ if (!(info->descriptors = vkd3d_calloc(info1->descriptor_count, sizeof(*info->descriptors)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ for (i = 0; i < info1->descriptor_count; ++i) ++ { ++ const struct vkd3d_shader_descriptor_info1 *src = &info1->descriptors[i]; ++ struct vkd3d_shader_descriptor_info *dst = &info->descriptors[i]; ++ ++ dst->type = src->type; ++ dst->register_space = src->register_space; ++ dst->register_index = src->register_index; ++ dst->resource_type = src->resource_type; ++ dst->resource_data_type = src->resource_data_type; ++ dst->flags = src->flags; ++ dst->count = src->count; ++ } ++ info->descriptor_count = info1->descriptor_count; ++ ++ return VKD3D_OK; ++} ++ ++static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) ++{ ++ TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); ++ ++ vkd3d_free(scan_descriptor_info->descriptors); ++} ++ + static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) ++ struct vkd3d_shader_message_context *message_context, ++ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) + { +- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; ++ struct vkd3d_shader_scan_descriptor_info *descriptor_info; ++ struct vkd3d_shader_scan_signature_info *signature_info; + struct vkd3d_shader_instruction *instruction; + struct vkd3d_shader_scan_context context; + int ret = VKD3D_OK; + unsigned int i; + +- if ((scan_descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO))) ++ descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); ++ if (descriptor_info1) ++ { ++ descriptor_info1->descriptors = NULL; ++ descriptor_info1->descriptor_count = 0; ++ } ++ else if (descriptor_info) + { +- scan_descriptor_info->descriptors = NULL; +- scan_descriptor_info->descriptor_count = 0; ++ descriptor_info1 = &local_descriptor_info1; + } ++ signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); + +- vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); ++ vkd3d_shader_scan_context_init(&context, compile_info, descriptor_info1, message_context); + + if (TRACE_ON()) + { +@@ -1092,13 +1145,47 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + { + instruction = &parser->instructions.elements[i]; + if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) +- { +- if (scan_descriptor_info) +- vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); + break; ++ } ++ ++ for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) ++ { ++ struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; ++ struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; ++ ++ if (parser->shader_desc.flat_constant_count[i].external) ++ vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, ++ &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); ++ } ++ ++ if (!ret && signature_info) ++ { ++ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) ++ || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, ++ &parser->shader_desc.output_signature) ++ || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, ++ &parser->shader_desc.patch_constant_signature)) ++ { ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; + } + } + ++ if (!ret && descriptor_info) ++ ret = convert_descriptor_info(descriptor_info, descriptor_info1); ++ ++ if (ret < 0) ++ { ++ if (descriptor_info) ++ vkd3d_shader_free_scan_descriptor_info(descriptor_info); ++ if (descriptor_info1) ++ vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); ++ if (signature_info) ++ vkd3d_shader_free_scan_signature_info(signature_info); ++ } ++ else ++ { ++ vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); ++ } + vkd3d_shader_scan_context_cleanup(&context); + return ret; + } +@@ -1115,7 +1202,7 @@ static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, + return ret; + } + +- ret = scan_with_parser(compile_info, message_context, parser); ++ ret = scan_with_parser(compile_info, message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +@@ -1133,7 +1220,25 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, + return ret; + } + +- ret = scan_with_parser(compile_info, message_context, parser); ++ ret = scan_with_parser(compile_info, message_context, NULL, parser); ++ vkd3d_shader_parser_destroy(parser); ++ ++ return ret; ++} ++ ++static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_parser *parser; ++ int ret; ++ ++ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) ++ { ++ WARN("Failed to initialise shader parser.\n"); ++ return ret; ++ } ++ ++ ret = scan_with_parser(compile_info, message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +@@ -1152,6 +1257,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) + return ret; + ++ init_scan_signature_info(compile_info); ++ + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + + switch (compile_info->source_type) +@@ -1169,6 +1276,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + ret = scan_d3dbc(compile_info, &message_context); + break; + ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ ret = scan_dxil(compile_info, &message_context); ++ break; ++ + default: + ERR("Unsupported source type %#x.\n", compile_info->source_type); + ret = VKD3D_ERROR_INVALID_ARGUMENT; +@@ -1186,7 +1297,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { +- struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; ++ struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; + struct vkd3d_glsl_generator *glsl_generator; + struct vkd3d_shader_compile_info scan_info; + int ret; +@@ -1194,11 +1305,8 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + + scan_info = *compile_info; +- scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; +- scan_descriptor_info.next = scan_info.next; +- scan_info.next = &scan_descriptor_info; + +- if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) ++ if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) + return ret; + + switch (compile_info->target_type) +@@ -1212,7 +1320,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + message_context, &parser->location))) + { + ERR("Failed to create GLSL generator.\n"); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); ++ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + return VKD3D_ERROR; + } + +@@ -1230,7 +1338,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + assert(0); + } + +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); ++ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + return ret; + } + +@@ -1291,6 +1399,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ + return VKD3D_ERROR; + } + ++static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_parser *parser; ++ int ret; ++ ++ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) ++ { ++ WARN("Failed to initialise shader parser.\n"); ++ return ret; ++ } ++ ++ ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); ++ ++ vkd3d_shader_parser_destroy(parser); ++ return ret; ++} ++ + int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages) + { +@@ -1305,6 +1431,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) + return ret; + ++ init_scan_signature_info(compile_info); ++ + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + + switch (compile_info->source_type) +@@ -1321,6 +1449,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + ret = compile_d3d_bytecode(compile_info, out, &message_context); + break; + ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ ret = compile_dxbc_dxil(compile_info, out, &message_context); ++ break; ++ + default: + vkd3d_unreachable(); + } +@@ -1339,6 +1471,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ + vkd3d_free(scan_descriptor_info->descriptors); + } + ++void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) ++{ ++ TRACE("info %p.\n", info); ++ ++ vkd3d_shader_free_shader_signature(&info->input); ++ vkd3d_shader_free_shader_signature(&info->output); ++ vkd3d_shader_free_shader_signature(&info->patch_constant); ++} ++ + void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) + { + TRACE("shader_code %p.\n", shader_code); +@@ -1401,43 +1542,6 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu + desc->version = 0; + } + +-static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, +- const struct shader_signature *src) +-{ +- unsigned int i; +- +- signature->element_count = src->element_count; +- if (!src->elements) +- { +- assert(!signature->element_count); +- signature->elements = NULL; +- return true; +- } +- +- if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) +- return false; +- +- for (i = 0; i < signature->element_count; ++i) +- { +- struct vkd3d_shader_signature_element *d = &signature->elements[i]; +- struct signature_element *e = &src->elements[i]; +- +- d->semantic_name = e->semantic_name; +- d->semantic_index = e->semantic_index; +- d->stream_index = e->stream_index; +- d->sysval_semantic = e->sysval_semantic; +- d->component_type = e->component_type; +- d->register_index = e->register_index; +- if (e->register_count > 1) +- FIXME("Arrayed elements are not supported yet.\n"); +- d->mask = e->mask; +- d->used_mask = e->used_mask; +- d->min_precision = e->min_precision; +- } +- +- return true; +-} +- + void shader_signature_cleanup(struct shader_signature *signature) + { + vkd3d_free(signature->elements); +@@ -1526,6 +1630,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns + VKD3D_SHADER_SOURCE_DXBC_TPF, + VKD3D_SHADER_SOURCE_HLSL, + VKD3D_SHADER_SOURCE_D3D_BYTECODE, ++ VKD3D_SHADER_SOURCE_DXBC_DXIL, + }; + + TRACE("count %p.\n", count); +@@ -1564,6 +1669,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + + switch (source_type) + { ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: + case VKD3D_SHADER_SOURCE_DXBC_TPF: + *count = ARRAY_SIZE(dxbc_tpf_types); + return dxbc_tpf_types; +@@ -1792,3 +1898,41 @@ void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *ins + vkd3d_free(instructions->icbs[i]); + vkd3d_free(instructions->icbs); + } ++ ++void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, ++ const struct vkd3d_shader_signature *input_signature, ++ unsigned int *ret_count, struct vkd3d_shader_varying_map *varyings) ++{ ++ unsigned int count = 0; ++ unsigned int i; ++ ++ TRACE("output_signature %p, input_signature %p, ret_count %p, varyings %p.\n", ++ output_signature, input_signature, ret_count, varyings); ++ ++ for (i = 0; i < input_signature->element_count; ++i) ++ { ++ const struct vkd3d_shader_signature_element *input_element, *output_element; ++ ++ input_element = &input_signature->elements[i]; ++ ++ if (input_element->sysval_semantic != VKD3D_SHADER_SV_NONE) ++ continue; ++ ++ varyings[count].input_register_index = input_element->register_index; ++ varyings[count].input_mask = input_element->mask; ++ ++ if ((output_element = vkd3d_shader_find_signature_element(output_signature, ++ input_element->semantic_name, input_element->semantic_index, 0))) ++ { ++ varyings[count].output_signature_index = output_element - output_signature->elements; ++ } ++ else ++ { ++ varyings[count].output_signature_index = output_signature->element_count; ++ } ++ ++ ++count; ++ } ++ ++ *ret_count = count; ++} +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 406d53a3391..c719085e11f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -133,10 +133,13 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, + VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, ++ VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, + VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE = 5302, ++ VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, ++ VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, + + VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, + +@@ -145,8 +148,31 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, + VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, + VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, ++ VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, ++ VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, + + VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, ++ ++ VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY = 8000, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE = 8001, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET = 8002, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE = 8003, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE = 8004, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT = 8005, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE = 8006, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB = 8007, ++ VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT = 8008, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL = 8009, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID = 8010, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE = 8011, ++ ++ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, ++ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, ++ VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, ++ VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, ++ VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS = 8304, ++ ++ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, + }; + + enum vkd3d_shader_opcode +@@ -486,6 +512,9 @@ enum vkd3d_shader_register_type + VKD3DSPR_DEPTHOUTLE, + VKD3DSPR_RASTERIZER, + VKD3DSPR_OUTSTENCILREF, ++ VKD3DSPR_UNDEF, ++ ++ VKD3DSPR_COUNT, + + VKD3DSPR_INVALID = ~0u, + }; +@@ -516,6 +545,7 @@ enum vkd3d_data_type + VKD3D_DATA_DOUBLE, + VKD3D_DATA_CONTINUED, + VKD3D_DATA_UNUSED, ++ VKD3D_DATA_UINT8, + }; + + enum vkd3d_immconst_type +@@ -784,6 +814,8 @@ enum vkd3d_shader_input_sysval_semantic + VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, + }; + ++#define SIGNATURE_TARGET_LOCATION_UNUSED (~0u) ++ + struct signature_element + { + unsigned int sort_index; +@@ -792,16 +824,21 @@ struct signature_element + unsigned int stream_index; + enum vkd3d_shader_sysval_semantic sysval_semantic; + enum vkd3d_shader_component_type component_type; ++ /* Register index in the source shader. */ + unsigned int register_index; + unsigned int register_count; + unsigned int mask; + unsigned int used_mask; + enum vkd3d_shader_minimum_precision min_precision; ++ /* Register index / location in the target shader. ++ * If SIGNATURE_TARGET_LOCATION_UNUSED, this element should not be written. */ ++ unsigned int target_location; + }; + + struct shader_signature + { + struct signature_element *elements; ++ size_t elements_capacity; + unsigned int element_count; + }; + +@@ -811,9 +848,17 @@ struct vkd3d_shader_desc + { + const uint32_t *byte_code; + size_t byte_code_size; ++ bool is_dxil; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; ++ ++ uint32_t temp_count; ++ ++ struct ++ { ++ uint32_t used, external; ++ } flat_constant_count[3]; + }; + + struct vkd3d_shader_register_semantic +@@ -945,6 +990,8 @@ struct vkd3d_shader_instruction + } declaration; + }; + ++void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx); ++ + static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) + { + return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; +@@ -1066,6 +1113,24 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse + parser->ops->parser_destroy(parser); + } + ++struct vkd3d_shader_descriptor_info1 ++{ ++ enum vkd3d_shader_descriptor_type type; ++ unsigned int register_space; ++ unsigned int register_index; ++ unsigned int register_id; ++ enum vkd3d_shader_resource_type resource_type; ++ enum vkd3d_shader_resource_data_type resource_data_type; ++ unsigned int flags; ++ unsigned int count; ++}; ++ ++struct vkd3d_shader_scan_descriptor_info1 ++{ ++ struct vkd3d_shader_descriptor_info1 *descriptors; ++ unsigned int descriptor_count; ++}; ++ + void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version); + +@@ -1167,6 +1232,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); + int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); ++int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); + + void free_shader_desc(struct vkd3d_shader_desc *desc); + +@@ -1186,7 +1253,7 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); + #define SPIRV_MAX_SRC_COUNT 6 + + int spirv_compile(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + +@@ -1339,6 +1406,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, + } + + #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) ++#define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) + + #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') + #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') +@@ -1369,11 +1437,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void + void dxbc_writer_init(struct dxbc_writer *dxbc); + int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); + +-enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); +-enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( +- struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); +-enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, +- enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, +- struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); ++enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_compile_info *compile_info); + + #endif /* __VKD3D_SHADER_PRIVATE_H */ +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 32439eec7eb..8b5f7899cf3 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF + static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); + static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value); ++static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); + static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); + static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any); + +@@ -453,9 +454,9 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( + } + + /* ID3D12Fence */ +-static struct d3d12_fence *impl_from_ID3D12Fence(ID3D12Fence *iface) ++static struct d3d12_fence *impl_from_ID3D12Fence1(ID3D12Fence1 *iface) + { +- return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence1_iface); + } + + static VkResult d3d12_fence_create_vk_fence(struct d3d12_fence *fence, VkFence *vk_fence) +@@ -899,18 +900,19 @@ static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uin + vkd3d_mutex_unlock(&fence->mutex); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence1 *iface, + REFIID riid, void **object) + { + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + +- if (IsEqualGUID(riid, &IID_ID3D12Fence) ++ if (IsEqualGUID(riid, &IID_ID3D12Fence1) ++ || IsEqualGUID(riid, &IID_ID3D12Fence) + || IsEqualGUID(riid, &IID_ID3D12Pageable) + || IsEqualGUID(riid, &IID_ID3D12DeviceChild) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { +- ID3D12Fence_AddRef(iface); ++ ID3D12Fence1_AddRef(iface); + *object = iface; + return S_OK; + } +@@ -921,9 +923,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence *iface) ++static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence1 *iface) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + ULONG refcount = InterlockedIncrement(&fence->refcount); + + TRACE("%p increasing refcount to %u.\n", fence, refcount); +@@ -936,9 +938,9 @@ static void d3d12_fence_incref(struct d3d12_fence *fence) + InterlockedIncrement(&fence->internal_refcount); + } + +-static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence *iface) ++static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence1 *iface) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + ULONG refcount = InterlockedDecrement(&fence->refcount); + + TRACE("%p decreasing refcount to %u.\n", fence, refcount); +@@ -971,10 +973,10 @@ static void d3d12_fence_decref(struct d3d12_fence *fence) + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence1 *iface, + REFGUID guid, UINT *data_size, void *data) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -982,10 +984,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, + return vkd3d_get_private_data(&fence->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence1 *iface, + REFGUID guid, UINT data_size, const void *data) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -993,37 +995,37 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, + return vkd3d_set_private_data(&fence->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence1 *iface, + REFGUID guid, const IUnknown *data) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&fence->private_store, guid, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence *iface, const WCHAR *name) ++static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence1 *iface, const WCHAR *name) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, fence->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence *iface, REFIID iid, void **device) ++static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence1 *iface, REFIID iid, void **device) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(fence->device, iid, device); + } + +-static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface) ++static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence1 *iface) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + uint64_t completed_value; + + TRACE("iface %p.\n", iface); +@@ -1034,10 +1036,10 @@ static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface + return completed_value; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence1 *iface, + UINT64 value, HANDLE event) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + unsigned int i; + bool latch = false; + +@@ -1105,9 +1107,9 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen + return d3d12_device_flush_blocked_queues(fence->device); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) ++static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence1 *iface, UINT64 value) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, value %#"PRIx64".\n", iface, value); + +@@ -1116,7 +1118,16 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 v + return d3d12_fence_signal(fence, value, VK_NULL_HANDLE, true); + } + +-static const struct ID3D12FenceVtbl d3d12_fence_vtbl = ++static D3D12_FENCE_FLAGS STDMETHODCALLTYPE d3d12_fence_GetCreationFlags(ID3D12Fence1 *iface) ++{ ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); ++ ++ TRACE("iface %p.\n", iface); ++ ++ return fence->flags; ++} ++ ++static const struct ID3D12Fence1Vtbl d3d12_fence_vtbl = + { + /* IUnknown methods */ + d3d12_fence_QueryInterface, +@@ -1133,14 +1144,18 @@ static const struct ID3D12FenceVtbl d3d12_fence_vtbl = + d3d12_fence_GetCompletedValue, + d3d12_fence_SetEventOnCompletion, + d3d12_fence_Signal, ++ /* ID3D12Fence1 methods */ ++ d3d12_fence_GetCreationFlags, + }; + + static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) + { +- if (!iface) ++ ID3D12Fence1 *iface1; ++ ++ if (!(iface1 = (ID3D12Fence1 *)iface)) + return NULL; +- assert(iface->lpVtbl == &d3d12_fence_vtbl); +- return impl_from_ID3D12Fence(iface); ++ assert(iface1->lpVtbl == &d3d12_fence_vtbl); ++ return impl_from_ID3D12Fence1(iface1); + } + + static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, +@@ -1150,7 +1165,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * + VkResult vr; + HRESULT hr; + +- fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl; ++ fence->ID3D12Fence1_iface.lpVtbl = &d3d12_fence_vtbl; + fence->internal_refcount = 1; + fence->refcount = 1; + +@@ -1161,7 +1176,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * + + vkd3d_cond_init(&fence->null_event_cond); + +- if (flags) ++ if ((fence->flags = flags)) + FIXME("Ignoring flags %#x.\n", flags); + + fence->events = NULL; +@@ -1315,32 +1330,26 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm + return hr; + } + +- allocator->current_command_list = list; +- +- return S_OK; +-} +- +-static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator, +- struct d3d12_command_list *list) +-{ +- struct d3d12_device *device = allocator->device; +- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +- +- TRACE("allocator %p, list %p.\n", allocator, list); +- +- if (allocator->current_command_list == list) +- allocator->current_command_list = NULL; +- + if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size, + allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers))) + { + WARN("Failed to add command buffer.\n"); + VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, + 1, &list->vk_command_buffer)); +- return; ++ return E_OUTOFMEMORY; + } +- + allocator->command_buffers[allocator->command_buffer_count++] = list->vk_command_buffer; ++ ++ allocator->current_command_list = list; ++ ++ return S_OK; ++} ++ ++static void d3d12_command_allocator_remove_command_list(struct d3d12_command_allocator *allocator, ++ const struct d3d12_command_list *list) ++{ ++ if (allocator->current_command_list == list) ++ allocator->current_command_list = NULL; + } + + static bool d3d12_command_allocator_add_render_pass(struct d3d12_command_allocator *allocator, VkRenderPass pass) +@@ -1910,10 +1919,32 @@ HRESULT d3d12_command_allocator_create(struct d3d12_device *device, + return S_OK; + } + ++static void d3d12_command_signature_incref(struct d3d12_command_signature *signature) ++{ ++ vkd3d_atomic_increment(&signature->internal_refcount); ++} ++ ++static void d3d12_command_signature_decref(struct d3d12_command_signature *signature) ++{ ++ unsigned int refcount = vkd3d_atomic_decrement(&signature->internal_refcount); ++ ++ if (!refcount) ++ { ++ struct d3d12_device *device = signature->device; ++ ++ vkd3d_private_store_destroy(&signature->private_store); ++ ++ vkd3d_free((void *)signature->desc.pArgumentDescs); ++ vkd3d_free(signature); ++ ++ d3d12_device_release(device); ++ } ++} ++ + /* ID3D12CommandList */ +-static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList2(ID3D12GraphicsCommandList2 *iface) ++static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList3(ID3D12GraphicsCommandList3 *iface) + { +- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); + } + + static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) +@@ -2259,12 +2290,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList3 *iface, + REFIID iid, void **object) + { + TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); + +- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) ++ if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) ++ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList) + || IsEqualGUID(iid, &IID_ID3D12CommandList) +@@ -2272,7 +2304,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic + || IsEqualGUID(iid, &IID_ID3D12Object) + || IsEqualGUID(iid, &IID_IUnknown)) + { +- ID3D12GraphicsCommandList2_AddRef(iface); ++ ID3D12GraphicsCommandList3_AddRef(iface); + *object = iface; + return S_OK; + } +@@ -2283,9 +2315,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList2 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList3 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + ULONG refcount = InterlockedIncrement(&list->refcount); + + TRACE("%p increasing refcount to %u.\n", list, refcount); +@@ -2298,9 +2330,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind + vkd3d_free(bindings->vk_uav_counter_views); + } + +-static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList2 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList3 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + ULONG refcount = InterlockedDecrement(&list->refcount); + + TRACE("%p decreasing refcount to %u.\n", list, refcount); +@@ -2313,7 +2345,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL + + /* When command pool is destroyed, all command buffers are implicitly freed. */ + if (list->allocator) +- d3d12_command_allocator_free_command_buffer(list->allocator, list); ++ d3d12_command_allocator_remove_command_list(list->allocator, list); + + vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE]); + vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS]); +@@ -2326,66 +2358,66 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL + return refcount; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList3 *iface, + REFGUID guid, UINT *data_size, void *data) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&list->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList3 *iface, + REFGUID guid, UINT data_size, const void *data) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&list->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList3 *iface, + REFGUID guid, const IUnknown *data) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&list->private_store, guid, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList2 *iface, const WCHAR *name) ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList3 *iface, const WCHAR *name) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList2 *iface, REFIID iid, void **device) ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList3 *iface, REFIID iid, void **device) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(list->device, iid, device); + } + +-static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList2 *iface) ++static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList3 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p.\n", iface); + + return list->type; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList2 *iface) ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList3 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + VkResult vr; + +@@ -2411,7 +2443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL + + if (list->allocator) + { +- d3d12_command_allocator_free_command_buffer(list->allocator, list); ++ d3d12_command_allocator_remove_command_list(list->allocator, list); + list->allocator = NULL; + } + +@@ -2429,7 +2461,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL + static void d3d12_command_list_reset_state(struct d3d12_command_list *list, + ID3D12PipelineState *initial_pipeline_state) + { +- ID3D12GraphicsCommandList2 *iface = &list->ID3D12GraphicsCommandList2_iface; ++ ID3D12GraphicsCommandList3 *iface = &list->ID3D12GraphicsCommandList3_iface; + + memset(list->strides, 0, sizeof(list->strides)); + list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; +@@ -2465,14 +2497,14 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, + + list->descriptor_heap_count = 0; + +- ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); ++ ID3D12GraphicsCommandList3_SetPipelineState(iface, initial_pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList3 *iface, + ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) + { + struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + HRESULT hr; + + TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", +@@ -2499,7 +2531,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL + return hr; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList3 *iface, + ID3D12PipelineState *pipeline_state) + { + FIXME("iface %p, pipline_state %p stub!\n", iface, pipeline_state); +@@ -3185,6 +3217,20 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) + } + } + ++static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) ++{ ++ if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) ++ { ++ if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) ++ { ++ /* Descriptors can be written after binding. */ ++ FIXME("Flushing descriptor updates while list %p is not closed.\n", list); ++ command_list_flush_vk_heap_updates(list); ++ } ++ list->descriptor_heaps[list->descriptor_heap_count++] = heap; ++ } ++} ++ + static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) + { +@@ -3209,18 +3255,6 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l + bindings->sampler_heap_id = heap->serial_id; + } + +- if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) +- { +- if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) +- { +- /* Descriptors can be written after binding. */ +- FIXME("Flushing descriptor updates while list %p is not closed.\n", list); +- command_list_flush_vk_heap_updates(list); +- list->descriptor_heap_count = 0; +- } +- list->descriptor_heaps[list->descriptor_heap_count++] = heap; +- } +- + vkd3d_mutex_lock(&heap->vk_sets_mutex); + + for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) +@@ -3353,11 +3387,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList3 *iface, + UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, + UINT start_instance_location) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " +@@ -3377,11 +3411,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom + instance_count, start_vertex_location, start_instance_location)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList3 *iface, + UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, + INT base_vertex_location, UINT start_instance_location) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " +@@ -3403,10 +3437,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap + instance_count, start_vertex_location, base_vertex_location, start_instance_location)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList3 *iface, + UINT x, UINT y, UINT z) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); +@@ -3422,10 +3456,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL + VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_vk_device_procs *vk_procs; + VkBufferCopy buffer_copy; +@@ -3624,7 +3658,7 @@ static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_ + static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, + struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, + const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, +- unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) ++ unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format, unsigned int layer_count) + { + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; +@@ -3651,6 +3685,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + buffer_image_copy.bufferImageHeight = 0; + vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, + src_format, src_sub_resource_idx, src_desc->MipLevels); ++ buffer_image_copy.imageSubresource.layerCount = layer_count; + src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; + buffer_image_copy.imageOffset.x = 0; + buffer_image_copy.imageOffset.y = 0; +@@ -3658,7 +3693,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx); + + buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * +- buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; ++ buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth * layer_count; + if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) + { + ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); +@@ -3684,6 +3719,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + + vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, + dst_format, dst_sub_resource_idx, dst_desc->MipLevels); ++ buffer_image_copy.imageSubresource.layerCount = layer_count; + dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; + + assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == +@@ -3705,11 +3741,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) + && box->back > box->front; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList3 *iface, + const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, + const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_format *src_format, *dst_format; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -3813,7 +3849,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic + { + d3d12_command_list_copy_incompatible_texture_region(list, + dst_resource, dst->u.SubresourceIndex, dst_format, +- src_resource, src->u.SubresourceIndex, src_format); ++ src_resource, src->u.SubresourceIndex, src_format, 1); + return; + } + +@@ -3830,11 +3866,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst, ID3D12Resource *src) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *dst_resource, *src_resource; ++ const struct vkd3d_format *dst_format, *src_format; + const struct vkd3d_vk_device_procs *vk_procs; + VkBufferCopy vk_buffer_copy; + VkImageCopy vk_image_copy; +@@ -3867,16 +3904,29 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm + else + { + layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); ++ dst_format = dst_resource->format; ++ src_format = src_resource->format; + + assert(d3d12_resource_is_texture(dst_resource)); + assert(d3d12_resource_is_texture(src_resource)); + assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); + assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); + ++ if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) ++ { ++ for (i = 0; i < dst_resource->desc.MipLevels; ++i) ++ { ++ d3d12_command_list_copy_incompatible_texture_region(list, ++ dst_resource, i, dst_format, ++ src_resource, i, src_format, layer_count); ++ } ++ return; ++ } ++ + for (i = 0; i < dst_resource->desc.MipLevels; ++i) + { + vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, +- src_resource->format, dst_resource->format, NULL, 0, 0, 0); ++ src_format, dst_format, NULL, 0, 0, 0); + vk_image_copy.dstSubresource.layerCount = layer_count; + vk_image_copy.srcSubresource.layerCount = layer_count; + VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, +@@ -3886,7 +3936,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, + const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, + D3D12_TILE_COPY_FLAGS flags) +@@ -3897,11 +3947,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand + buffer, buffer_offset, flags); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst, UINT dst_sub_resource_idx, + ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_format *src_format, *dst_format, *vk_format; + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -3964,10 +4014,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList3 *iface, + D3D12_PRIMITIVE_TOPOLOGY topology) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, topology %#x.\n", iface, topology); + +@@ -3978,11 +4028,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr + d3d12_command_list_invalidate_current_pipeline(list); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList3 *iface, + UINT viewport_count, const D3D12_VIEWPORT *viewports) + { + VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; + +@@ -4016,10 +4066,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo + VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList3 *iface, + UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; +@@ -4044,10 +4094,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic + VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList3 *iface, + const FLOAT blend_factor[4]) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); +@@ -4056,10 +4106,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics + VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList3 *iface, + UINT stencil_ref) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); +@@ -4068,11 +4118,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC + VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList3 *iface, + ID3D12PipelineState *pipeline_state) + { + struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); + +@@ -4123,10 +4173,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA + return 0; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList3 *iface, + UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + bool have_aliasing_barriers = false, have_split_barriers = false; + const struct vkd3d_vk_device_procs *vk_procs; + const struct vkd3d_vulkan_info *vk_info; +@@ -4349,13 +4399,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC + WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList3 *iface, + ID3D12GraphicsCommandList *command_list) + { + FIXME("iface %p, command_list %p stub!\n", iface, command_list); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList3 *iface, + UINT heap_count, ID3D12DescriptorHeap *const *heaps) + { + TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); +@@ -4381,10 +4431,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis + d3d12_command_list_invalidate_root_parameters(list, bind_point); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList3 *iface, + ID3D12RootSignature *root_signature) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_signature %p.\n", iface, root_signature); + +@@ -4392,10 +4442,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G + unsafe_impl_from_ID3D12RootSignature(root_signature)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList3 *iface, + ID3D12RootSignature *root_signature) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_signature %p.\n", iface, root_signature); + +@@ -4408,6 +4458,7 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l + { + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct d3d12_root_signature *root_signature = bindings->root_signature; ++ struct d3d12_descriptor_heap *descriptor_heap; + struct d3d12_desc *desc; + + assert(root_signature_get_descriptor_table(root_signature, index)); +@@ -4418,15 +4469,25 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l + if (bindings->descriptor_tables[index] == desc) + return; + ++ descriptor_heap = d3d12_desc_get_descriptor_heap(desc); ++ if (!(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) ++ { ++ /* GetGPUDescriptorHandleForHeapStart() returns a null handle in this case, ++ * but a CPU handle could be passed. */ ++ WARN("Descriptor heap %p is not shader visible.\n", descriptor_heap); ++ return; ++ } ++ command_list_add_descriptor_heap(list, descriptor_heap); ++ + bindings->descriptor_tables[index] = desc; + bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; + bindings->descriptor_table_active_mask |= (uint64_t)1 << index; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", + iface, root_parameter_index, base_descriptor.ptr); +@@ -4435,10 +4496,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I + root_parameter_index, base_descriptor); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", + iface, root_parameter_index, base_descriptor.ptr); +@@ -4460,10 +4521,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis + c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, UINT data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", + iface, root_parameter_index, data, dst_offset); +@@ -4472,10 +4533,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 + root_parameter_index, dst_offset, 1, &data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, UINT data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", + iface, root_parameter_index, data, dst_offset); +@@ -4484,10 +4545,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID + root_parameter_index, dst_offset, 1, &data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", + iface, root_parameter_index, constant_count, data, dst_offset); +@@ -4496,10 +4557,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID + root_parameter_index, dst_offset, constant_count, data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", + iface, root_parameter_index, constant_count, data, dst_offset); +@@ -4561,9 +4622,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4572,9 +4633,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4633,9 +4694,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4645,9 +4706,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4657,9 +4718,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4669,9 +4730,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4680,10 +4741,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV + root_parameter_index, address); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList3 *iface, + const D3D12_INDEX_BUFFER_VIEW *view) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_resource *resource; + enum VkIndexType index_type; +@@ -4723,10 +4784,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics + view->BufferLocation - resource->gpu_address, index_type)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList3 *iface, + UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_null_resources *null_resources; + struct vkd3d_gpu_va_allocator *gpu_va_allocator; + VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; +@@ -4781,10 +4842,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi + d3d12_command_list_invalidate_current_pipeline(list); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList3 *iface, + UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; + VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; + VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; +@@ -4846,11 +4907,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm + VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList3 *iface, + UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, + BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct d3d12_rtv_desc *rtv_desc; + const struct d3d12_dsv_desc *dsv_desc; + VkFormat prev_dsv_format; +@@ -5051,12 +5112,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList3 *iface, + D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, + UINT rect_count, const D3D12_RECT *rects) + { + const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); + struct VkAttachmentDescription attachment_desc; + struct VkAttachmentReference ds_reference; +@@ -5100,10 +5161,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra + &clear_value, rect_count, rects); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList3 *iface, + D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); + struct VkAttachmentDescription attachment_desc; + struct VkAttachmentReference color_reference; +@@ -5348,11 +5409,11 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList3 *iface, + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, + const UINT values[4], UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct vkd3d_view *descriptor, *uint_view = NULL; + struct d3d12_device *device = list->device; + struct vkd3d_texture_view_desc view_desc; +@@ -5414,11 +5475,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + vkd3d_view_decref(uint_view, device); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList3 *iface, + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, + const float values[4], UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *resource_impl; + VkClearColorValue colour; + struct vkd3d_view *view; +@@ -5434,16 +5495,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I + d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) + { + FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList3 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); + const struct vkd3d_vk_device_procs *vk_procs; + VkQueryControlFlags flags = 0; +@@ -5470,10 +5531,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman + VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList3 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); + const struct vkd3d_vk_device_procs *vk_procs; + +@@ -5515,12 +5576,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) + return sizeof(uint64_t); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList3 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, + ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) + { + const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i, first, count; +@@ -5596,10 +5657,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); + const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -5668,19 +5729,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList3 *iface, + UINT metadata, const void *data, UINT size) + { + FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList3 *iface, + UINT metadata, const void *data, UINT size) + { + FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList2 *iface) ++static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList3 *iface) + { + FIXME("iface %p stub!\n", iface); + } +@@ -5689,14 +5750,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN + STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); + STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); + +-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList3 *iface, + ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, + UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) + { + struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); + struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); + struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; +@@ -5714,6 +5775,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC + return; + } + ++ d3d12_command_signature_incref(sig_impl); ++ + signature_desc = &sig_impl->desc; + for (i = 0; i < signature_desc->NumArgumentDescs; ++i) + { +@@ -5776,6 +5839,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC + if (!d3d12_command_list_update_compute_state(list)) + { + WARN("Failed to update compute state, ignoring dispatch.\n"); ++ d3d12_command_signature_decref(sig_impl); + return; + } + +@@ -5788,9 +5852,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC + break; + } + } ++ ++ d3d12_command_signature_decref(sig_impl); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst_buffer, UINT64 dst_offset, + ID3D12Resource *src_buffer, UINT64 src_offset, + UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, +@@ -5803,7 +5869,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap + dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst_buffer, UINT64 dst_offset, + ID3D12Resource *src_buffer, UINT64 src_offset, + UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, +@@ -5816,20 +5882,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr + dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList3 *iface, + FLOAT min, FLOAT max) + { + FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList3 *iface, + UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) + { + FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", + iface, sample_count, pixel_count, sample_positions); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, + ID3D12Resource *src_resource, UINT src_sub_resource_idx, + D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) +@@ -5841,16 +5907,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 + src_resource, src_sub_resource_idx, src_rect, format, mode); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList2 *iface, UINT mask) ++static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList3 *iface, UINT mask) + { + FIXME("iface %p, mask %#x stub!\n", iface, mask); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList3 *iface, + UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, + const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *resource; + unsigned int i; + +@@ -5863,7 +5929,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap + } + } + +-static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = ++static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList3 *iface, ++ ID3D12ProtectedResourceSession *protected_session) ++{ ++ FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); ++} ++ ++static const struct ID3D12GraphicsCommandList3Vtbl d3d12_command_list_vtbl = + { + /* IUnknown methods */ + d3d12_command_list_QueryInterface, +@@ -5939,6 +6011,8 @@ static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = + d3d12_command_list_SetViewInstanceMask, + /* ID3D12GraphicsCommandList2 methods */ + d3d12_command_list_WriteBufferImmediate, ++ /* ID3D12GraphicsCommandList3 methods */ ++ d3d12_command_list_SetProtectedResourceSession, + }; + + static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) +@@ -5946,7 +6020,7 @@ static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12Comma + if (!iface) + return NULL; + assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); +- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); + } + + static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, +@@ -5955,7 +6029,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d + { + HRESULT hr; + +- list->ID3D12GraphicsCommandList2_iface.lpVtbl = &d3d12_command_list_vtbl; ++ list->ID3D12GraphicsCommandList3_iface.lpVtbl = &d3d12_command_list_vtbl; + list->refcount = 1; + + list->type = type; +@@ -6063,8 +6137,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if + return refcount; + } + ++static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) ++{ ++ switch (op->opcode) ++ { ++ case VKD3D_CS_OP_WAIT: ++ d3d12_fence_decref(op->u.wait.fence); ++ break; ++ ++ case VKD3D_CS_OP_SIGNAL: ++ d3d12_fence_decref(op->u.signal.fence); ++ break; ++ ++ case VKD3D_CS_OP_EXECUTE: ++ vkd3d_free(op->u.execute.buffers); ++ break; ++ ++ case VKD3D_CS_OP_UPDATE_MAPPINGS: ++ case VKD3D_CS_OP_COPY_MAPPINGS: ++ break; ++ } ++} ++ + static void d3d12_command_queue_op_array_destroy(struct d3d12_command_queue_op_array *array) + { ++ unsigned int i; ++ ++ for (i = 0; i < array->count; ++i) ++ d3d12_command_queue_destroy_op(&array->ops[i]); ++ + vkd3d_free(array->ops); + } + +@@ -6162,17 +6263,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc + return &array->ops[array->count++]; + } + ++static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) ++{ ++ void *buffer; ++ ++ *dst = NULL; ++ if (src) ++ { ++ if (!(buffer = vkd3d_calloc(count, elem_size))) ++ return false; ++ memcpy(buffer, src, count * elem_size); ++ *dst = buffer; ++ } ++ return true; ++} ++ ++static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) ++{ ++ vkd3d_free(update_mappings->region_start_coordinates); ++ vkd3d_free(update_mappings->region_sizes); ++ vkd3d_free(update_mappings->range_flags); ++ vkd3d_free(update_mappings->heap_range_offsets); ++ vkd3d_free(update_mappings->range_tile_counts); ++} ++ + static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, + ID3D12Resource *resource, UINT region_count, + const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, + ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, +- UINT *heap_range_offsets, UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) ++ const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) + { +- FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " ++ struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); ++ struct vkd3d_cs_update_mappings update_mappings = {0}; ++ struct vkd3d_cs_op_data *op; ++ ++ TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " + "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " +- "range_tile_counts %p, flags %#x stub!\n", ++ "range_tile_counts %p, flags %#x.\n", + iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, + range_flags, heap_range_offsets, range_tile_counts, flags); ++ ++ if (!region_count || !range_count) ++ return; ++ ++ if (!command_queue->supports_sparse_binding) ++ { ++ FIXME("Command queue %p does not support sparse binding.\n", command_queue); ++ return; ++ } ++ ++ if (!resource_impl->tiles.subresource_count) ++ { ++ WARN("Resource %p is not a tiled resource.\n", resource_impl); ++ return; ++ } ++ ++ if (region_count > 1 && !region_start_coordinates) ++ { ++ WARN("Region start coordinates must not be NULL when region count is > 1.\n"); ++ return; ++ } ++ ++ if (range_count > 1 && !range_tile_counts) ++ { ++ WARN("Range tile counts must not be NULL when range count is > 1.\n"); ++ return; ++ } ++ ++ update_mappings.resource = resource_impl; ++ update_mappings.heap = heap_impl; ++ if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, ++ region_start_coordinates, sizeof(*region_start_coordinates), region_count)) ++ { ++ ERR("Failed to allocate region start coordinates.\n"); ++ return; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.region_sizes, ++ region_sizes, sizeof(*region_sizes), region_count)) ++ { ++ ERR("Failed to allocate region sizes.\n"); ++ goto free_clones; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.range_flags, ++ range_flags, sizeof(*range_flags), range_count)) ++ { ++ ERR("Failed to allocate range flags.\n"); ++ goto free_clones; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, ++ heap_range_offsets, sizeof(*heap_range_offsets), range_count)) ++ { ++ ERR("Failed to allocate heap range offsets.\n"); ++ goto free_clones; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, ++ range_tile_counts, sizeof(*range_tile_counts), range_count)) ++ { ++ ERR("Failed to allocate range tile counts.\n"); ++ goto free_clones; ++ } ++ update_mappings.region_count = region_count; ++ update_mappings.range_count = range_count; ++ update_mappings.flags = flags; ++ ++ vkd3d_mutex_lock(&command_queue->op_mutex); ++ ++ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) ++ { ++ ERR("Failed to add op.\n"); ++ goto unlock_mutex; ++ } ++ ++ op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; ++ op->u.update_mappings = update_mappings; ++ ++ d3d12_command_queue_submit_locked(command_queue); ++ ++ vkd3d_mutex_unlock(&command_queue->op_mutex); ++ return; ++ ++unlock_mutex: ++ vkd3d_mutex_unlock(&command_queue->op_mutex); ++free_clones: ++ update_mappings_cleanup(&update_mappings); + } + + static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, +@@ -6183,10 +6398,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command + const D3D12_TILE_REGION_SIZE *region_size, + D3D12_TILE_MAPPING_FLAGS flags) + { +- FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " +- "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", ++ struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); ++ struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct vkd3d_cs_op_data *op; ++ ++ TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " ++ "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", + iface, dst_resource, dst_region_start_coordinate, src_resource, + src_region_start_coordinate, region_size, flags); ++ ++ vkd3d_mutex_lock(&command_queue->op_mutex); ++ ++ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) ++ { ++ ERR("Failed to add op.\n"); ++ return; ++ } ++ op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; ++ op->u.copy_mappings.dst_resource = dst_resource_impl; ++ op->u.copy_mappings.src_resource = src_resource_impl; ++ op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; ++ op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; ++ op->u.copy_mappings.region_size = *region_size; ++ op->u.copy_mappings.flags = flags; ++ ++ d3d12_command_queue_submit_locked(command_queue); ++ ++ vkd3d_mutex_unlock(&command_queue->op_mutex); + } + + static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, +@@ -6214,8 +6453,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu + ERR("Failed to submit queue(s), vr %d.\n", vr); + + vkd3d_queue_release(vkd3d_queue); +- +- vkd3d_free(buffers); + } + + static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue) +@@ -6273,7 +6510,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { + ERR("Failed to add op.\n"); +- return; ++ goto done; + } + op->opcode = VKD3D_CS_OP_EXECUTE; + op->u.execute.buffers = buffers; +@@ -6281,6 +6518,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm + + d3d12_command_queue_submit_locked(command_queue); + ++done: + vkd3d_mutex_unlock(&command_queue->op_mutex); + return; + } +@@ -6348,6 +6586,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { ++ ERR("Failed to add op.\n"); + hr = E_OUTOFMEMORY; + goto done; + } +@@ -6686,6 +6925,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { ++ ERR("Failed to add op.\n"); + hr = E_OUTOFMEMORY; + goto done; + } +@@ -6922,22 +7162,31 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * + return d3d12_command_queue_fixup_after_flush_locked(queue); + } + d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); +- d3d12_fence_decref(fence); + break; + + case VKD3D_CS_OP_SIGNAL: + d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); +- d3d12_fence_decref(op->u.signal.fence); + break; + + case VKD3D_CS_OP_EXECUTE: + d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); + break; + ++ case VKD3D_CS_OP_UPDATE_MAPPINGS: ++ FIXME("Tiled resource binding is not supported yet.\n"); ++ update_mappings_cleanup(&op->u.update_mappings); ++ break; ++ ++ case VKD3D_CS_OP_COPY_MAPPINGS: ++ FIXME("Tiled resource mapping copying is not supported yet.\n"); ++ break; ++ + default: + vkd3d_unreachable(); + } + ++ d3d12_command_queue_destroy_op(op); ++ + *flushed_any |= true; + } + +@@ -7000,6 +7249,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, + if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) + goto fail_destroy_op_mutex; + ++ queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); ++ + d3d12_device_add_ref(queue->device = device); + + return S_OK; +@@ -7105,16 +7356,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSign + TRACE("%p decreasing refcount to %u.\n", signature, refcount); + + if (!refcount) +- { +- struct d3d12_device *device = signature->device; +- +- vkd3d_private_store_destroy(&signature->private_store); +- +- vkd3d_free((void *)signature->desc.pArgumentDescs); +- vkd3d_free(signature); +- +- d3d12_device_release(device); +- } ++ d3d12_command_signature_decref(signature); + + return refcount; + } +@@ -7221,6 +7463,7 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_ + + object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl; + object->refcount = 1; ++ object->internal_refcount = 1; + + object->desc = *desc; + if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs)))) +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index 4263dcf4184..a2e1f13dec3 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -1464,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + + vulkan_info->device_limits = physical_device_info->properties2.properties.limits; + vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; ++ vulkan_info->sparse_binding = features->sparseBinding; ++ vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; + vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; + vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; + vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; +@@ -2655,8 +2657,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *if + initial_pipeline_state, &object))) + return hr; + +- return return_interface(&object->ID3D12GraphicsCommandList2_iface, +- &IID_ID3D12GraphicsCommandList2, riid, command_list); ++ return return_interface(&object->ID3D12GraphicsCommandList3_iface, ++ &IID_ID3D12GraphicsCommandList3, riid, command_list); + } + + /* Direct3D feature levels restrict which formats can be optionally supported. */ +@@ -3412,6 +3414,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + struct d3d12_device *device = impl_from_ID3D12Device(iface); + unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; + unsigned int dst_range_size, src_range_size; ++ struct d3d12_descriptor_heap *dst_heap; + const struct d3d12_desc *src; + struct d3d12_desc *dst; + +@@ -3441,13 +3444,14 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; + + dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); ++ dst_heap = d3d12_desc_get_descriptor_heap(dst); + src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); + + for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) + { + if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) + continue; +- d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); ++ d3d12_desc_copy(&dst[dst_idx], &src[src_idx], dst_heap, device); + } + + if (dst_idx >= dst_range_size) +@@ -3745,7 +3749,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, + if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object))) + return hr; + +- return return_interface(&object->ID3D12Fence_iface, &IID_ID3D12Fence, riid, fence); ++ return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); + } + + static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device *iface) +@@ -3889,12 +3893,18 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, + D3D12_SUBRESOURCE_TILING *sub_resource_tilings) + { +- FIXME("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " ++ const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); ++ struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ ++ TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " + "standard_title_shape %p, sub_resource_tiling_count %p, " +- "first_sub_resource_tiling %u, sub_resource_tilings %p stub!\n", ++ "first_sub_resource_tiling %u, sub_resource_tilings %p.\n", + iface, resource, total_tile_count, packed_mip_info, standard_tile_shape, + sub_resource_tiling_count, first_sub_resource_tiling, + sub_resource_tilings); ++ ++ d3d12_resource_get_tiling(device, resource_impl, total_tile_count, packed_mip_info, standard_tile_shape, ++ sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); + } + + static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, LUID *luid) +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index ea7b6859cc1..cd3856c2937 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -779,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, + VkImageFormatListCreateInfoKHR format_list; + const struct vkd3d_format *format; + VkImageCreateInfo image_info; ++ uint32_t count; + VkResult vr; + + if (resource) +@@ -914,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, + if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) + resource->flags |= VKD3D_RESOURCE_LINEAR_TILING; + ++ if (sparse_resource) ++ { ++ count = 0; ++ VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, ++ image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); ++ ++ if (!count) ++ { ++ FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", ++ image_info.format, image_info.imageType, image_info.samples, image_info.usage); ++ return E_INVALIDARG; ++ } ++ } ++ + if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) + WARN("Failed to create Vulkan image, vr %d.\n", vr); + +@@ -928,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + D3D12_RESOURCE_DESC validated_desc; + VkMemoryRequirements requirements; + VkImage vk_image; ++ bool tiled; + HRESULT hr; + + assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); +@@ -940,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + desc = &validated_desc; + } + ++ tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; ++ + /* XXX: We have to create an image to get its memory requirements. */ +- if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) ++ if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) + { + VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); + VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); +@@ -953,6 +971,11 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + return hr; + } + ++static void d3d12_resource_tile_info_cleanup(struct d3d12_resource *resource) ++{ ++ vkd3d_free(resource->tiles.subresources); ++} ++ + static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12_device *device) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +@@ -968,6 +991,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 + else + VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); + ++ d3d12_resource_tile_info_cleanup(resource); ++ + if (resource->heap) + d3d12_heap_resource_destroyed(resource->heap); + } +@@ -1039,12 +1064,196 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, + box->back = d3d12_resource_desc_get_depth(&resource->desc, level); + } + +-/* ID3D12Resource */ +-static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) ++static void compute_image_subresource_size_in_tiles(const VkExtent3D *tile_extent, ++ const struct D3D12_RESOURCE_DESC *desc, unsigned int miplevel_idx, ++ struct vkd3d_tiled_region_extent *size) + { +- return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); ++ unsigned int width, height, depth; ++ ++ width = d3d12_resource_desc_get_width(desc, miplevel_idx); ++ height = d3d12_resource_desc_get_height(desc, miplevel_idx); ++ depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); ++ size->width = (width + tile_extent->width - 1) / tile_extent->width; ++ size->height = (height + tile_extent->height - 1) / tile_extent->height; ++ size->depth = (depth + tile_extent->depth - 1) / tile_extent->depth; + } + ++void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, ++ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, ++ UINT *subresource_tiling_count, UINT first_subresource_tiling, ++ D3D12_SUBRESOURCE_TILING *subresource_tilings) ++{ ++ unsigned int i, subresource, subresource_count, miplevel_idx, count; ++ const struct vkd3d_subresource_tile_info *tile_info; ++ const VkExtent3D *tile_extent; ++ ++ tile_extent = &resource->tiles.tile_extent; ++ ++ if (packed_mip_info) ++ { ++ packed_mip_info->NumStandardMips = resource->tiles.standard_mip_count; ++ packed_mip_info->NumPackedMips = resource->desc.MipLevels - packed_mip_info->NumStandardMips; ++ packed_mip_info->NumTilesForPackedMips = !!resource->tiles.packed_mip_tile_count; /* non-zero dummy value */ ++ packed_mip_info->StartTileIndexInOverallResource = packed_mip_info->NumPackedMips ++ ? resource->tiles.subresources[resource->tiles.standard_mip_count].offset : 0; ++ } ++ ++ if (standard_tile_shape) ++ { ++ /* D3D12 docs say tile shape is cleared to zero if there is no standard mip, but drivers don't to do this. */ ++ standard_tile_shape->WidthInTexels = tile_extent->width; ++ standard_tile_shape->HeightInTexels = tile_extent->height; ++ standard_tile_shape->DepthInTexels = tile_extent->depth; ++ } ++ ++ if (total_tile_count) ++ *total_tile_count = resource->tiles.total_count; ++ ++ if (!subresource_tiling_count) ++ return; ++ ++ subresource_count = resource->tiles.subresource_count; ++ ++ count = subresource_count - min(first_subresource_tiling, subresource_count); ++ count = min(count, *subresource_tiling_count); ++ ++ for (i = 0; i < count; ++i) ++ { ++ subresource = i + first_subresource_tiling; ++ miplevel_idx = subresource % resource->desc.MipLevels; ++ if (miplevel_idx >= resource->tiles.standard_mip_count) ++ { ++ memset(&subresource_tilings[i], 0, sizeof(subresource_tilings[i])); ++ subresource_tilings[i].StartTileIndexInOverallResource = D3D12_PACKED_TILE; ++ continue; ++ } ++ ++ tile_info = &resource->tiles.subresources[subresource]; ++ subresource_tilings[i].StartTileIndexInOverallResource = tile_info->offset; ++ subresource_tilings[i].WidthInTiles = tile_info->extent.width; ++ subresource_tilings[i].HeightInTiles = tile_info->extent.height; ++ subresource_tilings[i].DepthInTiles = tile_info->extent.depth; ++ } ++ *subresource_tiling_count = i; ++} ++ ++static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3d12_device *device) ++{ ++ unsigned int i, start_idx, subresource_count, tile_count, miplevel_idx; ++ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; ++ VkSparseImageMemoryRequirements *sparse_requirements_array; ++ VkSparseImageMemoryRequirements sparse_requirements = {0}; ++ struct vkd3d_subresource_tile_info *tile_info; ++ VkMemoryRequirements requirements; ++ const VkExtent3D *tile_extent; ++ uint32_t requirement_count; ++ ++ subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); ++ ++ if (!(resource->tiles.subresources = vkd3d_calloc(subresource_count, sizeof(*resource->tiles.subresources)))) ++ { ++ ERR("Failed to allocate subresource info array.\n"); ++ return false; ++ } ++ ++ if (d3d12_resource_is_buffer(resource)) ++ { ++ assert(subresource_count == 1); ++ ++ VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); ++ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) ++ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); ++ ++ tile_info = &resource->tiles.subresources[0]; ++ tile_info->offset = 0; ++ tile_info->extent.width = align(resource->desc.Width, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) ++ / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; ++ tile_info->extent.height = 1; ++ tile_info->extent.depth = 1; ++ tile_info->count = tile_info->extent.width; ++ ++ resource->tiles.tile_extent.width = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; ++ resource->tiles.tile_extent.height = 1; ++ resource->tiles.tile_extent.depth = 1; ++ resource->tiles.total_count = tile_info->extent.width; ++ resource->tiles.subresource_count = 1; ++ resource->tiles.standard_mip_count = 1; ++ resource->tiles.packed_mip_tile_count = 0; ++ } ++ else ++ { ++ VK_CALL(vkGetImageMemoryRequirements(device->vk_device, resource->u.vk_image, &requirements)); ++ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) ++ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); ++ ++ requirement_count = 0; ++ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, &requirement_count, NULL)); ++ if (!(sparse_requirements_array = vkd3d_calloc(requirement_count, sizeof(*sparse_requirements_array)))) ++ { ++ ERR("Failed to allocate sparse requirements array.\n"); ++ return false; ++ } ++ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, ++ &requirement_count, sparse_requirements_array)); ++ ++ for (i = 0; i < requirement_count; ++i) ++ { ++ if (sparse_requirements_array[i].formatProperties.aspectMask & resource->format->vk_aspect_mask) ++ { ++ if (sparse_requirements.formatProperties.aspectMask) ++ { ++ WARN("Ignoring properties for aspect mask %#x.\n", ++ sparse_requirements_array[i].formatProperties.aspectMask); ++ } ++ else ++ { ++ sparse_requirements = sparse_requirements_array[i]; ++ } ++ } ++ } ++ vkd3d_free(sparse_requirements_array); ++ if (!sparse_requirements.formatProperties.aspectMask) ++ { ++ WARN("Failed to get sparse requirements.\n"); ++ return false; ++ } ++ ++ resource->tiles.tile_extent = sparse_requirements.formatProperties.imageGranularity; ++ resource->tiles.subresource_count = subresource_count; ++ resource->tiles.standard_mip_count = sparse_requirements.imageMipTailSize ++ ? sparse_requirements.imageMipTailFirstLod : resource->desc.MipLevels; ++ resource->tiles.packed_mip_tile_count = (resource->tiles.standard_mip_count < resource->desc.MipLevels) ++ ? sparse_requirements.imageMipTailSize / requirements.alignment : 0; ++ ++ for (i = 0, start_idx = 0; i < subresource_count; ++i) ++ { ++ miplevel_idx = i % resource->desc.MipLevels; ++ ++ tile_extent = &sparse_requirements.formatProperties.imageGranularity; ++ tile_info = &resource->tiles.subresources[i]; ++ compute_image_subresource_size_in_tiles(tile_extent, &resource->desc, miplevel_idx, &tile_info->extent); ++ tile_info->offset = start_idx; ++ tile_info->count = 0; ++ ++ if (miplevel_idx < resource->tiles.standard_mip_count) ++ { ++ tile_count = tile_info->extent.width * tile_info->extent.height * tile_info->extent.depth; ++ start_idx += tile_count; ++ tile_info->count = tile_count; ++ } ++ else if (miplevel_idx == resource->tiles.standard_mip_count) ++ { ++ tile_info->count = 1; /* Non-zero dummy value */ ++ start_idx += 1; ++ } ++ } ++ resource->tiles.total_count = start_idx; ++ } ++ ++ return true; ++} ++ ++/* ID3D12Resource */ + static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, + REFIID riid, void **object) + { +@@ -1661,6 +1870,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d + return E_INVALIDARG; + } + ++ if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) ++ { ++ if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) ++ { ++ WARN("The device does not support tiled 3D images.\n"); ++ return E_INVALIDARG; ++ } ++ if (format->plane_count > 1) ++ { ++ WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", ++ format->dxgi_format); ++ return E_INVALIDARG; ++ } ++ } ++ + if (!d3d12_resource_validate_texture_format(desc, format) + || !d3d12_resource_validate_texture_alignment(desc, format)) + return E_INVALIDARG; +@@ -1722,6 +1946,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + + resource->desc = *desc; + ++ if (!heap_properties && !device->vk_info.sparse_binding) ++ { ++ WARN("The device does not support tiled images.\n"); ++ return E_INVALIDARG; ++ } ++ + if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) + return E_INVALIDARG; + +@@ -1787,6 +2017,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + resource->heap = NULL; + resource->heap_offset = 0; + ++ memset(&resource->tiles, 0, sizeof(resource->tiles)); ++ + if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) + { + d3d12_resource_destroy(resource, device); +@@ -1972,6 +2204,12 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, + desc, initial_state, optimized_clear_value, &object))) + return hr; + ++ if (!d3d12_resource_init_tiles(object, device)) ++ { ++ d3d12_resource_Release(&object->ID3D12Resource_iface); ++ return E_OUTOFMEMORY; ++ } ++ + TRACE("Created reserved resource %p.\n", object); + + *resource = object; +@@ -2368,13 +2606,11 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr + descriptor_writes_free_object_refs(&writes, device); + } + +-static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) ++static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_descriptor_heap *descriptor_heap) + { +- struct d3d12_descriptor_heap *descriptor_heap; + unsigned int i, head; + + i = dst->index; +- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); + head = descriptor_heap->dirty_list_head; + + /* Only one thread can swap the value away from zero. */ +@@ -2388,14 +2624,20 @@ static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) + } + } + +-void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, +- struct d3d12_device *device) ++static inline void descriptor_heap_write_atomic(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_desc *dst, ++ const struct d3d12_desc *src, struct d3d12_device *device) + { + void *object = src->s.u.object; + + d3d12_desc_replace(dst, object, device); +- if (device->use_vk_heaps && object && !dst->next) +- d3d12_desc_mark_as_modified(dst); ++ if (descriptor_heap->use_vk_heaps && object && !dst->next) ++ d3d12_desc_mark_as_modified(dst, descriptor_heap); ++} ++ ++void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, ++ struct d3d12_device *device) ++{ ++ descriptor_heap_write_atomic(d3d12_desc_get_descriptor_heap(dst), dst, src, device); + } + + static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +@@ -2403,7 +2645,9 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic + d3d12_desc_replace(descriptor, NULL, device); + } + +-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, ++/* This is a major performance bottleneck for some games, so do not load the device ++ * pointer from dst_heap. In some cases device will not be used. */ ++void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, + struct d3d12_device *device) + { + struct d3d12_desc tmp; +@@ -2411,7 +2655,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, + assert(dst != src); + + tmp.s.u.object = d3d12_desc_get_object_ref(src, device); +- d3d12_desc_write_atomic(dst, &tmp, device); ++ descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); + } + + static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, +@@ -3810,7 +4054,15 @@ static D3D12_GPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_Get + + TRACE("iface %p, descriptor %p.\n", iface, descriptor); + +- descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; ++ if (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) ++ { ++ descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; ++ } ++ else ++ { ++ WARN("Heap %p is not shader-visible.\n", iface); ++ descriptor->ptr = 0; ++ } + + return descriptor; + } +@@ -3913,7 +4165,7 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri + descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; + memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); + +- if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ++ if (!descriptor_heap->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) + return S_OK; + +@@ -3944,6 +4196,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript + if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) + return hr; + ++ descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); + d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); + +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index 5e46b467252..2d8138245d8 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -20,6 +20,7 @@ + + #include "vkd3d_private.h" + #include "vkd3d_shaders.h" ++#include "vkd3d_shader_utils.h" + + /* ID3D12RootSignature */ + static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) +@@ -1978,14 +1979,14 @@ static HRESULT create_shader_stage(struct d3d12_device *device, + compile_info.next = shader_interface; + compile_info.source.code = code->pShaderBytecode; + compile_info.source.size = code->BytecodeLength; +- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; + compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; + compile_info.options = options; + compile_info.option_count = ARRAY_SIZE(options); + compile_info.log_level = VKD3D_SHADER_LOG_NONE; + compile_info.source_name = NULL; + +- if ((ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) ++ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0 ++ || (ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) + { + WARN("Failed to compile shader, vkd3d result %d.\n", ret); + return hresult_from_vkd3d_result(ret); +@@ -2008,6 +2009,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER + struct vkd3d_shader_scan_descriptor_info *descriptor_info) + { + struct vkd3d_shader_compile_info compile_info; ++ enum vkd3d_result ret; + + const struct vkd3d_shader_compile_option options[] = + { +@@ -2019,13 +2021,15 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER + compile_info.next = descriptor_info; + compile_info.source.code = code->pShaderBytecode; + compile_info.source.size = code->BytecodeLength; +- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; + compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; + compile_info.options = options; + compile_info.option_count = ARRAY_SIZE(options); + compile_info.log_level = VKD3D_SHADER_LOG_NONE; + compile_info.source_name = NULL; + ++ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0) ++ return ret; ++ + return vkd3d_shader_scan(&compile_info, NULL); + } + +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index b0150754434..4bd6812b16e 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -147,9 +147,12 @@ struct vkd3d_vulkan_info + unsigned int max_vertex_attrib_divisor; + + VkPhysicalDeviceLimits device_limits; +- VkPhysicalDeviceSparseProperties sparse_properties; + struct vkd3d_device_descriptor_limits descriptor_limits; + ++ VkPhysicalDeviceSparseProperties sparse_properties; ++ bool sparse_binding; ++ bool sparse_residency_3d; ++ + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + + unsigned int shader_extension_count; +@@ -250,6 +253,11 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) + { + } + ++static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) ++{ ++ return InterlockedIncrement((LONG volatile *)x); ++} ++ + static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) + { + return InterlockedDecrement((LONG volatile *)x); +@@ -384,6 +392,15 @@ static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) + } + # else + # error "vkd3d_atomic_decrement() not implemented for this platform" ++# endif /* HAVE_SYNC_SUB_AND_FETCH */ ++ ++# if HAVE_SYNC_ADD_AND_FETCH ++static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) ++{ ++ return __sync_add_and_fetch(x, 1); ++} ++# else ++# error "vkd3d_atomic_increment() not implemented for this platform" + # endif /* HAVE_SYNC_ADD_AND_FETCH */ + + # if HAVE_SYNC_BOOL_COMPARE_AND_SWAP +@@ -599,10 +616,12 @@ struct vkd3d_signaled_semaphore + /* ID3D12Fence */ + struct d3d12_fence + { +- ID3D12Fence ID3D12Fence_iface; ++ ID3D12Fence1 ID3D12Fence1_iface; + LONG internal_refcount; + LONG refcount; + ++ D3D12_FENCE_FLAGS flags; ++ + uint64_t value; + uint64_t max_pending_value; + struct vkd3d_mutex mutex; +@@ -670,6 +689,30 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); + #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 + #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 + ++struct vkd3d_tiled_region_extent ++{ ++ unsigned int width; ++ unsigned int height; ++ unsigned int depth; ++}; ++ ++struct vkd3d_subresource_tile_info ++{ ++ unsigned int offset; ++ unsigned int count; ++ struct vkd3d_tiled_region_extent extent; ++}; ++ ++struct d3d12_resource_tile_info ++{ ++ VkExtent3D tile_extent; ++ unsigned int total_count; ++ unsigned int standard_mip_count; ++ unsigned int packed_mip_tile_count; ++ unsigned int subresource_count; ++ struct vkd3d_subresource_tile_info *subresources; ++}; ++ + /* ID3D12Resource */ + struct d3d12_resource + { +@@ -698,9 +741,16 @@ struct d3d12_resource + + struct d3d12_device *device; + ++ struct d3d12_resource_tile_info tiles; ++ + struct vkd3d_private_store private_store; + }; + ++static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) ++{ ++ return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); ++} ++ + static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) + { + return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; +@@ -713,6 +763,10 @@ static inline bool d3d12_resource_is_texture(const struct d3d12_resource *resour + + bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); + HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d12_device *device); ++void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, ++ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, ++ UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, ++ D3D12_SUBRESOURCE_TILING *sub_resource_tilings); + + HRESULT d3d12_committed_resource_create(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, +@@ -853,8 +907,9 @@ static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc * + { + do + { +- view = src->s.u.object; +- } while (view && !vkd3d_view_incref(view)); ++ if (!(view = src->s.u.object)) ++ return NULL; ++ } while (!vkd3d_view_incref(view)); + + /* Check if the object is still in src to handle the case where it was + * already freed and reused elsewhere when the refcount was incremented. */ +@@ -880,7 +935,10 @@ static inline void d3d12_desc_copy_raw(struct d3d12_desc *dst, const struct d3d1 + dst->s = src->s; + } + +-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); ++struct d3d12_descriptor_heap; ++ ++void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, ++ struct d3d12_device *device); + void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, + struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); + void d3d12_desc_create_srv(struct d3d12_desc *descriptor, +@@ -983,6 +1041,7 @@ struct d3d12_descriptor_heap + D3D12_DESCRIPTOR_HEAP_DESC desc; + + struct d3d12_device *device; ++ bool use_vk_heaps; + + struct vkd3d_private_store private_store; + +@@ -1367,7 +1426,7 @@ enum vkd3d_pipeline_bind_point + /* ID3D12CommandList */ + struct d3d12_command_list + { +- ID3D12GraphicsCommandList2 ID3D12GraphicsCommandList2_iface; ++ ID3D12GraphicsCommandList3 ID3D12GraphicsCommandList3_iface; + LONG refcount; + + D3D12_COMMAND_LIST_TYPE type; +@@ -1454,6 +1513,8 @@ enum vkd3d_cs_op + VKD3D_CS_OP_WAIT, + VKD3D_CS_OP_SIGNAL, + VKD3D_CS_OP_EXECUTE, ++ VKD3D_CS_OP_UPDATE_MAPPINGS, ++ VKD3D_CS_OP_COPY_MAPPINGS, + }; + + struct vkd3d_cs_wait +@@ -1474,6 +1535,30 @@ struct vkd3d_cs_execute + unsigned int buffer_count; + }; + ++struct vkd3d_cs_update_mappings ++{ ++ struct d3d12_resource *resource; ++ struct d3d12_heap *heap; ++ D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; ++ D3D12_TILE_REGION_SIZE *region_sizes; ++ D3D12_TILE_RANGE_FLAGS *range_flags; ++ UINT *heap_range_offsets; ++ UINT *range_tile_counts; ++ UINT region_count; ++ UINT range_count; ++ D3D12_TILE_MAPPING_FLAGS flags; ++}; ++ ++struct vkd3d_cs_copy_mappings ++{ ++ struct d3d12_resource *dst_resource; ++ struct d3d12_resource *src_resource; ++ D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; ++ D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; ++ D3D12_TILE_REGION_SIZE region_size; ++ D3D12_TILE_MAPPING_FLAGS flags; ++}; ++ + struct vkd3d_cs_op_data + { + enum vkd3d_cs_op opcode; +@@ -1482,6 +1567,8 @@ struct vkd3d_cs_op_data + struct vkd3d_cs_wait wait; + struct vkd3d_cs_signal signal; + struct vkd3d_cs_execute execute; ++ struct vkd3d_cs_update_mappings update_mappings; ++ struct vkd3d_cs_copy_mappings copy_mappings; + } u; + }; + +@@ -1519,6 +1606,8 @@ struct d3d12_command_queue + * set, aux_op_queue.count must be zero. */ + struct d3d12_command_queue_op_array aux_op_queue; + ++ bool supports_sparse_binding; ++ + struct vkd3d_private_store private_store; + }; + +@@ -1530,6 +1619,7 @@ struct d3d12_command_signature + { + ID3D12CommandSignature ID3D12CommandSignature_iface; + LONG refcount; ++ unsigned int internal_refcount; + + D3D12_COMMAND_SIGNATURE_DESC desc; + +-- +2.40.1 + diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-622311da8e87c60110329044e29b88dd524.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-622311da8e87c60110329044e29b88dd524.patch new file mode 100644 index 00000000..d636ba98 --- /dev/null +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-622311da8e87c60110329044e29b88dd524.patch @@ -0,0 +1,1097 @@ +From 08931d59f470aef0def6f683a1dc25f8546c78e2 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 24 Aug 2023 08:49:26 +1000 +Subject: [PATCH] Updated vkd3d to 622311da8e87c60110329044e29b88dd524e28e7. + +--- + libs/vkd3d/include/vkd3d_shader.h | 2 + + libs/vkd3d/include/vkd3d_windows.h | 1 + + .../libs/vkd3d-shader/vkd3d_shader_main.c | 58 +++-- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 3 + + libs/vkd3d/libs/vkd3d/device.c | 225 ++++++++++-------- + libs/vkd3d/libs/vkd3d/resource.c | 59 +++-- + libs/vkd3d/libs/vkd3d/state.c | 4 +- + libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 22 +- + 9 files changed, 236 insertions(+), 142 deletions(-) + +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index e98aad4fe95..cfe54dbff53 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -1339,6 +1339,8 @@ enum vkd3d_shader_descriptor_info_flag + /** The descriptor is a UAV resource, on which the shader performs + * atomic ops. \since 1.6 */ + VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS = 0x00000008, ++ /** The descriptor is a raw (byte-addressed) buffer. \since 1.9 */ ++ VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER = 0x00000010, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DESCRIPTOR_INFO_FLAG), + }; +diff --git a/libs/vkd3d/include/vkd3d_windows.h b/libs/vkd3d/include/vkd3d_windows.h +index 002ff667cbc..2daa74e92eb 100644 +--- a/libs/vkd3d/include/vkd3d_windows.h ++++ b/libs/vkd3d/include/vkd3d_windows.h +@@ -64,6 +64,7 @@ typedef int HRESULT; + + # define DXGI_ERROR_NOT_FOUND _HRESULT_TYPEDEF_(0x887a0002) + # define DXGI_ERROR_MORE_DATA _HRESULT_TYPEDEF_(0x887a0003) ++# define DXGI_ERROR_UNSUPPORTED _HRESULT_TYPEDEF_(0x887a0004) + + # define D3DERR_INVALIDCALL _HRESULT_TYPEDEF_(0x8876086c) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index c777bad2206..2bc8613f2ef 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -755,10 +755,10 @@ static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_cont + vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); + } + +-static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, ++static struct vkd3d_shader_descriptor_info1 *vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, + enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, + const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, +- enum vkd3d_shader_resource_data_type resource_data_type, unsigned int flags) ++ enum vkd3d_shader_resource_data_type resource_data_type) + { + struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; + struct vkd3d_shader_descriptor_info1 *d; +@@ -767,56 +767,61 @@ static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *c + info->descriptor_count + 1, sizeof(*info->descriptors))) + { + ERR("Failed to allocate descriptor info.\n"); +- return false; ++ return NULL; + } + + d = &info->descriptors[info->descriptor_count]; ++ memset(d, 0, sizeof(*d)); + d->type = type; + d->register_id = reg->idx[0].offset; + d->register_space = range->space; + d->register_index = range->first; + d->resource_type = resource_type; + d->resource_data_type = resource_data_type; +- d->flags = flags; + d->count = (range->last == ~0u) ? ~0u : range->last - range->first + 1; + ++info->descriptor_count; + +- return true; ++ return d; + } + + static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) + { + const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; ++ struct vkd3d_shader_descriptor_info1 *d; + + if (!context->scan_descriptor_info) + return; + +- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->src.reg, &cb->range, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); ++ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ++ &cb->src.reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) ++ return; ++ d->buffer_size = cb->size * 16; + } + + static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) + { + const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; +- unsigned int flags; ++ struct vkd3d_shader_descriptor_info1 *d; + + if (!context->scan_descriptor_info) + return; + ++ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, ++ &sampler->src.reg, &sampler->range, VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT))) ++ return; ++ + if (instruction->flags & VKD3DSI_SAMPLER_COMPARISON_MODE) +- flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; +- else +- flags = 0; +- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->src.reg, &sampler->range, +- VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); ++ d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; + } + + static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, +- enum vkd3d_shader_resource_data_type resource_data_type) ++ enum vkd3d_shader_resource_data_type resource_data_type, ++ unsigned int sample_count, unsigned int structure_stride, bool raw) + { ++ struct vkd3d_shader_descriptor_info1 *d; + enum vkd3d_shader_descriptor_type type; + + if (!context->scan_descriptor_info) +@@ -826,8 +831,13 @@ static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_cont + type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + else + type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; +- vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, &resource->range, +- resource_type, resource_data_type, 0); ++ if (!(d = vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, ++ &resource->range, resource_type, resource_data_type))) ++ return; ++ d->sample_count = sample_count; ++ d->structure_stride = structure_stride; ++ if (raw) ++ d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; + } + + static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, +@@ -886,7 +896,7 @@ static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_sca + } + + vkd3d_shader_scan_resource_declaration(context, &semantic->resource, +- semantic->resource_type, resource_data_type); ++ semantic->resource_type, resource_data_type, semantic->sample_count, 0, false); + } + + static void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, +@@ -920,12 +930,13 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_UAV_RAW: + vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.raw_resource.resource, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); ++ VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, 0, true); + break; + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + case VKD3DSIH_DCL_UAV_STRUCTURED: + vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.structured_resource.resource, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); ++ VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, ++ instruction->declaration.structured_resource.byte_stride, false); + break; + case VKD3DSIH_IF: + cf_info = vkd3d_shader_scan_push_cf_info(context); +@@ -1150,12 +1161,17 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + + for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) + { ++ unsigned int size = parser->shader_desc.flat_constant_count[i].external; + struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; + struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; ++ struct vkd3d_shader_descriptor_info1 *d; + + if (parser->shader_desc.flat_constant_count[i].external) +- vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, +- &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); ++ { ++ if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, ++ &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) ++ d->buffer_size = size * 16; ++ } + } + + if (!ret && signature_info) +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index c719085e11f..bf925a44690 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -1122,6 +1122,9 @@ struct vkd3d_shader_descriptor_info1 + enum vkd3d_shader_resource_type resource_type; + enum vkd3d_shader_resource_data_type resource_data_type; + unsigned int flags; ++ unsigned int sample_count; ++ unsigned int buffer_size; ++ unsigned int structure_stride; + unsigned int count; + }; + +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index a2e1f13dec3..c33061073a3 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -2435,34 +2435,39 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device) + + static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) + { +- cache->head = NULL; ++ memset(cache, 0, sizeof(*cache)); + cache->size = size; + } + + static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) + { + union d3d12_desc_object u; ++ unsigned int i; + void *next; + +- for (u.object = cache->head; u.object; u.object = next) ++ for (i = 0; i < ARRAY_SIZE(cache->heads); ++i) + { +- next = u.header->next; +- vkd3d_free(u.object); ++ for (u.object = cache->heads[i].head; u.object; u.object = next) ++ { ++ next = u.header->next; ++ vkd3d_free(u.object); ++ } + } + } + + /* ID3D12Device */ +-static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) ++static inline struct d3d12_device *impl_from_ID3D12Device1(ID3D12Device1 *iface) + { +- return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device1_iface); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device1 *iface, + REFIID riid, void **object) + { + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + +- if (IsEqualGUID(riid, &IID_ID3D12Device) ++ if (IsEqualGUID(riid, &IID_ID3D12Device1) ++ || IsEqualGUID(riid, &IID_ID3D12Device) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { +@@ -2477,9 +2482,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) ++static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device1 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + ULONG refcount = InterlockedIncrement(&device->refcount); + + TRACE("%p increasing refcount to %u.\n", device, refcount); +@@ -2487,9 +2492,9 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) + return refcount; + } + +-static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) ++static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device1 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + ULONG refcount = InterlockedDecrement(&device->refcount); + + TRACE("%p decreasing refcount to %u.\n", device, refcount); +@@ -2523,10 +2528,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) + return refcount; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device1 *iface, + REFGUID guid, UINT *data_size, void *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -2534,10 +2539,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface + return vkd3d_get_private_data(&device->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device1 *iface, + REFGUID guid, UINT data_size, const void *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -2545,19 +2550,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface + return vkd3d_set_private_data(&device->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device1 *iface, + REFGUID guid, const IUnknown *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&device->private_store, guid, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const WCHAR *name) ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device1 *iface, const WCHAR *name) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); + +@@ -2565,17 +2570,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); + } + +-static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device *iface) ++static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device1 *iface) + { + TRACE("iface %p.\n", iface); + + return 1; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device1 *iface, + const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_command_queue *object; + HRESULT hr; + +@@ -2589,10 +2594,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *i + riid, command_queue); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device1 *iface, + D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_command_allocator *object; + HRESULT hr; + +@@ -2606,10 +2611,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic + riid, command_allocator); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device1 *iface, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + +@@ -2623,10 +2628,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 + &IID_ID3D12PipelineState, riid, pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device1 *iface, + const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + +@@ -2640,11 +2645,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D + &IID_ID3D12PipelineState, riid, pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device1 *iface, + UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, + ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_command_list *object; + HRESULT hr; + +@@ -2767,10 +2772,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) + return true; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device1 *iface, + D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", + iface, feature, feature_data, feature_data_size); +@@ -3269,10 +3274,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device * + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device1 *iface, + const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_descriptor_heap *object; + HRESULT hr; + +@@ -3286,7 +3291,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device + &IID_ID3D12DescriptorHeap, riid, descriptor_heap); + } + +-static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device *iface, ++static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device1 *iface, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) + { + TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); +@@ -3309,11 +3314,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device1 *iface, + UINT node_mask, const void *bytecode, SIZE_T bytecode_length, + REFIID riid, void **root_signature) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_root_signature *object; + HRESULT hr; + +@@ -3329,10 +3334,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device * + &IID_ID3D12RootSignature, riid, root_signature); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device1 *iface, + const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); +@@ -3341,11 +3346,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device1 *iface, + ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", +@@ -3355,11 +3360,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device1 *iface, + ID3D12Resource *resource, ID3D12Resource *counter_resource, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", +@@ -3370,7 +3375,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device1 *iface, + ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +@@ -3378,10 +3383,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device * + iface, resource, desc, descriptor.ptr); + + d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), +- impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); ++ impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device1 *iface, + ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +@@ -3389,13 +3394,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device * + iface, resource, desc, descriptor.ptr); + + d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), +- impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); ++ impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device1 *iface, + const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); +@@ -3404,14 +3409,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device1 *iface, + UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, + const UINT *dst_descriptor_range_sizes, + UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, + const UINT *src_descriptor_range_sizes, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; + unsigned int dst_range_size, src_range_size; + struct d3d12_descriptor_heap *dst_heap; +@@ -3467,7 +3472,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + } + } + +-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device1 *iface, + UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, + const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) +@@ -3482,10 +3487,10 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i + } + + static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( +- ID3D12Device *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, ++ ID3D12Device1 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + UINT count, const D3D12_RESOURCE_DESC *resource_descs) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + const D3D12_RESOURCE_DESC *desc; + uint64_t requested_alignment; + +@@ -3558,10 +3563,10 @@ invalid: + return info; + } + +-static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device *iface, ++static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device1 *iface, + D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + bool coherent; + + TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", +@@ -3601,12 +3606,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope + return heap_properties; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device1 *iface, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_resource *object; + HRESULT hr; + +@@ -3625,10 +3630,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device1 *iface, + const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_heap *object; + HRESULT hr; + +@@ -3644,12 +3649,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, + return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device1 *iface, + ID3D12Heap *heap, UINT64 heap_offset, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_heap *heap_object; + struct d3d12_resource *object; + HRESULT hr; +@@ -3668,11 +3673,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device1 *iface, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_resource *object; + HRESULT hr; + +@@ -3686,11 +3691,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device1 *iface, + ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, + const WCHAR *name, HANDLE *handle) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", + iface, object, attributes, access, debugstr_w(name, device->wchar_size), handle); +@@ -3698,7 +3703,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *i + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device1 *iface, + HANDLE handle, REFIID riid, void **object) + { + FIXME("iface %p, handle %p, riid %s, object %p stub!\n", +@@ -3707,10 +3712,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *ifa + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device1 *iface, + const WCHAR *name, DWORD access, HANDLE *handle) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + FIXME("iface %p, name %s, access %#x, handle %p stub!\n", + iface, debugstr_w(name, device->wchar_size), access, handle); +@@ -3718,7 +3723,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device1 *iface, + UINT object_count, ID3D12Pageable * const *objects) + { + FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", +@@ -3727,7 +3732,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device1 *iface, + UINT object_count, ID3D12Pageable * const *objects) + { + FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", +@@ -3736,10 +3741,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device1 *iface, + UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_fence *object; + HRESULT hr; + +@@ -3752,21 +3757,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, + return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device *iface) ++static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device1 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p.\n", iface); + + return device->removed_reason; + } + +-static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device1 *iface, + const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, + UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, + UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; + unsigned int width, height, depth, plane_count, sub_resources_per_plane; +@@ -3846,10 +3851,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *i + *total_bytes = total; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device1 *iface, + const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_query_heap *object; + HRESULT hr; + +@@ -3862,18 +3867,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *ifac + return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device *iface, BOOL enable) ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device1 *iface, BOOL enable) + { + FIXME("iface %p, enable %#x stub!\n", iface, enable); + + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device1 *iface, + const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, + REFIID iid, void **command_signature) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_command_signature *object; + HRESULT hr; + +@@ -3887,14 +3892,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic + &IID_ID3D12CommandSignature, iid, command_signature); + } + +-static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device1 *iface, + ID3D12Resource *resource, UINT *total_tile_count, + D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, + D3D12_SUBRESOURCE_TILING *sub_resource_tilings) + { + const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " + "standard_title_shape %p, sub_resource_tiling_count %p, " +@@ -3907,9 +3912,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface + sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); + } + +-static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, LUID *luid) ++static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device1 *iface, LUID *luid) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, luid %p.\n", iface, luid); + +@@ -3918,7 +3923,33 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, + return luid; + } + +-static const struct ID3D12DeviceVtbl d3d12_device_vtbl = ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device1 *iface, ++ const void *blob, SIZE_T blob_size, REFIID iid, void **lib) ++{ ++ FIXME("iface %p, blob %p, blob_size %lu, iid %s, lib %p stub!\n", iface, blob, blob_size, debugstr_guid(iid), lib); ++ ++ return DXGI_ERROR_UNSUPPORTED; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device1 *iface, ++ ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, ++ D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) ++{ ++ FIXME("iface %p, fences %p, values %p, fence_count %u, flags %#x, event %p stub!\n", ++ iface, fences, values, fence_count, flags, event); ++ ++ return E_NOTIMPL; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device1 *iface, ++ UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) ++{ ++ FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); ++ ++ return S_OK; ++} ++ ++static const struct ID3D12Device1Vtbl d3d12_device_vtbl = + { + /* IUnknown methods */ + d3d12_device_QueryInterface, +@@ -3967,14 +3998,18 @@ static const struct ID3D12DeviceVtbl d3d12_device_vtbl = + d3d12_device_CreateCommandSignature, + d3d12_device_GetResourceTiling, + d3d12_device_GetAdapterLuid, ++ /* ID3D12Device1 methods */ ++ d3d12_device_CreatePipelineLibrary, ++ d3d12_device_SetEventOnMultipleFenceCompletion, ++ d3d12_device_SetResidencyPriority, + }; + +-struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) ++struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface) + { + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_device_vtbl); +- return impl_from_ID3D12Device(iface); ++ return impl_from_ID3D12Device1(iface); + } + + static HRESULT d3d12_device_init(struct d3d12_device *device, +@@ -3983,7 +4018,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + const struct vkd3d_vk_device_procs *vk_procs; + HRESULT hr; + +- device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; ++ device->ID3D12Device1_iface.lpVtbl = &d3d12_device_vtbl; + device->refcount = 1; + + vkd3d_instance_incref(device->vkd3d_instance = instance); +@@ -4180,28 +4215,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha + + IUnknown *vkd3d_get_device_parent(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); + + return d3d12_device->parent; + } + + VkDevice vkd3d_get_vk_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); + + return d3d12_device->vk_device; + } + + VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); + + return d3d12_device->vk_physical_device; + } + + struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); + + return d3d12_device->vkd3d_instance; + } +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index cd3856c2937..f3842958d96 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -2220,7 +2220,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, + HRESULT vkd3d_create_image_resource(ID3D12Device *device, + const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) + { +- struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device1((ID3D12Device1 *)device); + struct d3d12_resource *object; + HRESULT hr; + +@@ -2282,38 +2282,67 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) + return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); + } + +-/* Objects are cached so that vkd3d_view_incref() can safely check the refcount +- * of an object freed by another thread. */ ++#define HEAD_INDEX_MASK (ARRAY_SIZE(cache->heads) - 1) ++ ++/* Objects are cached so that vkd3d_view_incref() can safely check the refcount of an ++ * object freed by another thread. This could be implemented as a single atomic linked ++ * list, but it requires handling the ABA problem, which brings issues with cross-platform ++ * support, compiler support, and non-universal x86-64 support for 128-bit CAS. */ + static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) + { + union d3d12_desc_object u; +- void *next; ++ unsigned int i; + +- do ++ STATIC_ASSERT(!(ARRAY_SIZE(cache->heads) & HEAD_INDEX_MASK)); ++ ++ i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; ++ for (;;) + { +- u.object = cache->head; +- if (!u.object) ++ if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) ++ { ++ if ((u.object = cache->heads[i].head)) ++ { ++ vkd3d_atomic_decrement(&cache->free_count); ++ cache->heads[i].head = u.header->next; ++ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); ++ return u.object; ++ } ++ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); ++ } ++ /* Keeping a free count avoids uncertainty over when this loop should terminate, ++ * which could result in excess allocations gradually increasing without limit. */ ++ if (cache->free_count < ARRAY_SIZE(cache->heads)) + return vkd3d_malloc(cache->size); +- next = u.header->next; +- } +- while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next)); + +- return u.object; ++ i = (i + 1) & HEAD_INDEX_MASK; ++ } + } + + static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) + { + union d3d12_desc_object u = {object}; ++ unsigned int i; + void *head; + +- do ++ /* Using the same index as above may result in a somewhat uneven distribution, ++ * but the main objective is to avoid costly spinlock contention. */ ++ i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; ++ for (;;) + { +- head = cache->head; +- u.header->next = head; ++ if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) ++ break; ++ i = (i + 1) & HEAD_INDEX_MASK; + } +- while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object)); ++ ++ head = cache->heads[i].head; ++ u.header->next = head; ++ cache->heads[i].head = u.object; ++ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); ++ vkd3d_atomic_increment(&cache->free_count); + } + ++#undef HEAD_INDEX_MASK ++ + static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) + { + struct vkd3d_cbuffer_desc *desc; +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index 2d8138245d8..7ae46c862cc 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -375,8 +375,8 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig + + if (unbounded && range->OffsetInDescriptorsFromTableStart == D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + { +- WARN("An unbounded range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " +- "another unbounded range.\n"); ++ WARN("A range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " ++ "an unbounded range.\n"); + return E_INVALIDARG; + } + +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +index 88301fbb313..159560afd8e 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +@@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, + + if (!device) + { +- ID3D12Device_Release(&object->ID3D12Device_iface); ++ ID3D12Device_Release(&object->ID3D12Device1_iface); + return S_FALSE; + } + +- return return_interface(&object->ID3D12Device_iface, &IID_ID3D12Device, iid, device); ++ return return_interface(&object->ID3D12Device1_iface, &IID_ID3D12Device, iid, device); + } + + /* ID3D12RootSignatureDeserializer */ +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index 4bd6812b16e..a18287b4cd4 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -1690,9 +1690,17 @@ struct vkd3d_uav_clear_state + HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); + void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); + ++struct desc_object_cache_head ++{ ++ void *head; ++ unsigned int spinlock; ++}; ++ + struct vkd3d_desc_object_cache + { +- void * volatile head; ++ struct desc_object_cache_head heads[16]; ++ unsigned int next_index; ++ unsigned int free_count; + size_t size; + }; + +@@ -1701,7 +1709,7 @@ struct vkd3d_desc_object_cache + /* ID3D12Device */ + struct d3d12_device + { +- ID3D12Device ID3D12Device_iface; ++ ID3D12Device1 ID3D12Device1_iface; + LONG refcount; + + VkDevice vk_device; +@@ -1767,27 +1775,27 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 + bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); + void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, + const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); +-struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface); ++struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface); + + static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) + { +- return ID3D12Device_QueryInterface(&device->ID3D12Device_iface, iid, object); ++ return ID3D12Device1_QueryInterface(&device->ID3D12Device1_iface, iid, object); + } + + static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) + { +- return ID3D12Device_AddRef(&device->ID3D12Device_iface); ++ return ID3D12Device1_AddRef(&device->ID3D12Device1_iface); + } + + static inline ULONG d3d12_device_release(struct d3d12_device *device) + { +- return ID3D12Device_Release(&device->ID3D12Device_iface); ++ return ID3D12Device1_Release(&device->ID3D12Device1_iface); + } + + static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) + { +- return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); ++ return ID3D12Device1_GetDescriptorHandleIncrementSize(&device->ID3D12Device1_iface, descriptor_type); + } + + /* utils */ +-- +2.40.1 + diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-b4bb3931c5e7e59496c07c2d4e3b6cfd3bf.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-b4bb3931c5e7e59496c07c2d4e3b6cfd3bf.patch deleted file mode 100644 index 47996b9d..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-b4bb3931c5e7e59496c07c2d4e3b6cfd3bf.patch +++ /dev/null @@ -1,5119 +0,0 @@ -From 7a3d9f859ea9571d19d388781f91855a4e06f122 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 3 Aug 2023 08:38:38 +1000 -Subject: [PATCH 3/3] Updated vkd3d to - b4bb3931c5e7e59496c07c2d4e3b6cfd3bf91cf1. - ---- - include/d3d12.idl | 66 +++ - libs/vkd3d/include/private/vkd3d_common.h | 15 + - libs/vkd3d/include/vkd3d_shader.h | 59 ++- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 104 +++- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 59 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 32 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 4 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 494 ++++++++++++------ - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 152 +++--- - libs/vkd3d/libs/vkd3d-shader/ir.c | 159 +++++- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 140 +++-- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 164 ++++-- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 83 ++- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 9 + - .../libs/vkd3d-shader/vkd3d_shader_private.h | 15 +- - libs/vkd3d/libs/vkd3d/command.c | 451 +++++++++------- - libs/vkd3d/libs/vkd3d/device.c | 20 +- - libs/vkd3d/libs/vkd3d/resource.c | 238 ++++++++- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 55 +- - 19 files changed, 1756 insertions(+), 563 deletions(-) - -diff --git a/include/d3d12.idl b/include/d3d12.idl -index c6064939e1f..5811608b94f 100644 ---- a/include/d3d12.idl -+++ b/include/d3d12.idl -@@ -44,6 +44,7 @@ const UINT D3D12_DEFAULT_STENCIL_WRITE_MASK = 0xff; - const UINT D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND = 0xffffffff; - cpp_quote("#define D3D12_FLOAT32_MAX (3.402823466e+38f)") - const UINT D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT = 32; -+const UINT D3D12_PACKED_TILE = 0xffffffff; - const UINT D3D12_UAV_SLOT_COUNT = 64; - const UINT D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; - const UINT D3D12_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT = 4096; -@@ -72,6 +73,7 @@ const UINT D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT = 4096; - const UINT D3D12_STANDARD_MAXIMUM_ELEMENT_ALIGNMENT_BYTE_MULTIPLE = 4; - const UINT D3D12_TEXTURE_DATA_PITCH_ALIGNMENT = 256; - const UINT D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT = 512; -+const UINT D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES = 65536; - const UINT D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT = 4096; - const UINT D3D12_VS_INPUT_REGISTER_COUNT = 32; - const UINT D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE = 16; -@@ -1853,6 +1855,24 @@ typedef struct D3D12_WRITEBUFFERIMMEDIATE_PARAMETER - UINT32 Value; - } D3D12_WRITEBUFFERIMMEDIATE_PARAMETER; - -+typedef enum D3D12_PROTECTED_RESOURCE_SESSION_FLAGS -+{ -+ D3D12_PROTECTED_RESOURCE_SESSION_FLAG_NONE = 0, -+} D3D12_PROTECTED_RESOURCE_SESSION_FLAGS; -+cpp_quote("DEFINE_ENUM_FLAG_OPERATORS(D3D12_PROTECTED_RESOURCE_SESSION_FLAGS);") -+ -+typedef enum D3D12_PROTECTED_SESSION_STATUS -+{ -+ D3D12_PROTECTED_SESSION_STATUS_OK = 0, -+ D3D12_PROTECTED_SESSION_STATUS_INVALID = 1, -+} D3D12_PROTECTED_SESSION_STATUS; -+ -+typedef struct D3D12_PROTECTED_RESOURCE_SESSION_DESC -+{ -+ UINT NodeMask; -+ D3D12_PROTECTED_RESOURCE_SESSION_FLAGS Flags; -+} D3D12_PROTECTED_RESOURCE_SESSION_DESC; -+ - [ - uuid(c4fec28f-7966-4e95-9f94-f431cb56c3b8), - object, -@@ -2214,6 +2234,41 @@ interface ID3D12GraphicsCommandList2 : ID3D12GraphicsCommandList1 - const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes); - } - -+[ -+ uuid(a1533d18-0ac1-4084-85b9-89a96116806b), -+ object, -+ local, -+ pointer_default(unique) -+] -+interface ID3D12ProtectedSession : ID3D12DeviceChild -+{ -+ HRESULT GetStatusFence(REFIID riid, void **fence); -+ -+ D3D12_PROTECTED_SESSION_STATUS GetSessionStatus(); -+} -+ -+[ -+ uuid(6cd696f4-f289-40cc-8091-5a6c0a099c3d), -+ object, -+ local, -+ pointer_default(unique) -+] -+interface ID3D12ProtectedResourceSession : ID3D12ProtectedSession -+{ -+ D3D12_PROTECTED_RESOURCE_SESSION_DESC GetDesc(); -+} -+ -+[ -+ uuid(6fda83a7-b84c-4e38-9ac8-c7bd22016b3d), -+ object, -+ local, -+ pointer_default(unique) -+] -+interface ID3D12GraphicsCommandList3 : ID3D12GraphicsCommandList2 -+{ -+ void SetProtectedResourceSession(ID3D12ProtectedResourceSession *protected_resource_session); -+} -+ - typedef enum D3D12_TILE_RANGE_FLAGS - { - D3D12_TILE_RANGE_FLAG_NONE = 0x0, -@@ -2378,6 +2433,17 @@ interface ID3D12Fence : ID3D12Pageable - HRESULT Signal(UINT64 value); - } - -+[ -+ uuid(433685fe-e22b-4ca0-a8db-b5b4f4dd0e4a), -+ object, -+ local, -+ pointer_default(unique) -+] -+interface ID3D12Fence1 : ID3D12Fence -+{ -+ D3D12_FENCE_FLAGS GetCreationFlags(); -+} -+ - [ - uuid(6102dee4-af59-4b09-b999-b44d73f09b24), - object, -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 0263fc47297..f7d98f327f1 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -193,6 +193,21 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) - return (x > y) - (x < y); - } - -+static inline bool bitmap_clear(uint32_t *map, unsigned int idx) -+{ -+ return map[idx >> 5] &= ~(1u << (idx & 0x1f)); -+} -+ -+static inline bool bitmap_set(uint32_t *map, unsigned int idx) -+{ -+ return map[idx >> 5] |= (1u << (idx & 0x1f)); -+} -+ -+static inline bool bitmap_is_set(const uint32_t *map, unsigned int idx) -+{ -+ return map[idx >> 5] & (1u << (idx & 0x1f)); -+} -+ - static inline int ascii_isupper(int c) - { - return 'A' <= c && c <= 'Z'; -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 6c17a07b9d2..d6653d18e56 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -139,6 +139,14 @@ enum vkd3d_shader_compile_option_formatting_flags - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), - }; - -+enum vkd3d_shader_compile_option_pack_matrix_order -+{ -+ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR = 0x00000001, -+ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR = 0x00000002, -+ -+ VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER), -+}; -+ - enum vkd3d_shader_compile_option_name - { - /** -@@ -169,6 +177,15 @@ enum vkd3d_shader_compile_option_name - * \since 1.7 - */ - VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE = 0x00000006, -+ /** -+ * This option specifies default matrix packing order. It's only supported for HLSL source type. -+ * Explicit variable modifiers or pragmas will take precedence. -+ * -+ * \a value is a member of enum vkd3d_shader_compile_option_pack_matrix_order. -+ * -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER = 0x00000007, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), - }; -@@ -332,6 +349,25 @@ struct vkd3d_shader_parameter - } u; - }; - -+/** -+ * Symbolic register indices for mapping uniform constant register sets in -+ * legacy Direct3D bytecode to constant buffer views in the target environment. -+ * -+ * Members of this enumeration are used in -+ * \ref vkd3d_shader_resource_binding.register_index. -+ * -+ * \since 1.9 -+ */ -+enum vkd3d_shader_d3dbc_constant_register -+{ -+ /** The float constant register set, c# in Direct3D assembly. */ -+ VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER = 0x0, -+ /** The integer constant register set, i# in Direct3D assembly. */ -+ VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER = 0x1, -+ /** The boolean constant register set, b# in Direct3D assembly. */ -+ VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER = 0x2, -+}; -+ - /** - * Describes the mapping of a single resource or resource array to its binding - * point in the target environment. -@@ -356,7 +392,14 @@ struct vkd3d_shader_resource_binding - * support multiple register spaces, this parameter must be set to 0. - */ - unsigned int register_space; -- /** Register index of the DXBC resource. */ -+ /** -+ * Register index of the Direct3D resource. -+ * -+ * For legacy Direct3D shaders, vkd3d-shader maps each constant register -+ * set to a single constant buffer view. This parameter names the register -+ * set to map, and must be a member of -+ * enum vkd3d_shader_d3dbc_constant_register. -+ */ - unsigned int register_index; - /** Shader stage(s) to which the resource is visible. */ - enum vkd3d_shader_visibility shader_visibility; -@@ -1330,6 +1373,20 @@ struct vkd3d_shader_descriptor_info - * A chained structure enumerating the descriptors declared by a shader. - * - * This structure extends vkd3d_shader_compile_info. -+ * -+ * When scanning a legacy Direct3D shader, vkd3d-shader enumerates each -+ * constant register set used by the shader as a single constant buffer -+ * descriptor, as follows: -+ * - The \ref vkd3d_shader_descriptor_info.type field is set to -+ * VKD3D_SHADER_DESCRIPTOR_TYPE_CBV. -+ * - The \ref vkd3d_shader_descriptor_info.register_space field is set to zero. -+ * - The \ref vkd3d_shader_descriptor_info.register_index field is set to a -+ * member of enum vkd3d_shader_d3dbc_constant_register denoting which set -+ * is used. -+ * - The \ref vkd3d_shader_descriptor_info.count field is set to one. -+ * -+ * In summary, there may be up to three such descriptors, one for each register -+ * set used by the shader: float, integer, and boolean. - */ - struct vkd3d_shader_scan_descriptor_info - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 369112ce18d..fe739339bd1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -214,6 +214,9 @@ struct vkd3d_shader_sm1_parser - bool abort; - - struct vkd3d_shader_parser p; -+ -+#define MAX_CONSTANT_COUNT 8192 -+ uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; - }; - - /* This table is not order or position dependent. */ -@@ -729,12 +732,60 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * - semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); - } - --static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_register *reg, unsigned int mask) -+static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, -+ enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) - { -+ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; -+ -+ desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); -+ if (from_def) -+ { -+ /* d3d shaders have a maximum of 8192 constants; we should not overrun -+ * this array. */ -+ assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); -+ bitmap_set(sm1->constant_def_mask[set], index); -+ } -+} -+ -+static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) -+{ -+ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; - uint32_t register_index = reg->idx[0].offset; - -- if (reg->type == VKD3DSPR_TEMP) -- sm1->p.shader_desc.temp_count = max(sm1->p.shader_desc.temp_count, register_index + 1); -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ desc->temp_count = max(desc->temp_count, register_index + 1); -+ break; -+ -+ case VKD3DSPR_CONST: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONST2: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONST3: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONST4: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONSTINT: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONSTBOOL: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def); -+ break; -+ -+ default: -+ break; -+ } - - add_signature_element_from_register(sm1, reg, false, mask); - } -@@ -1076,16 +1127,19 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_FLOAT); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } - else if (ins->handler_idx == VKD3DSIH_DEFB) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_SCALAR, VKD3D_DATA_UINT); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } - else if (ins->handler_idx == VKD3DSIH_DEFI) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_INT); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } - else - { -@@ -1093,7 +1147,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - if (ins->dst_count) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); -- shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, false); - } - - /* Predication token */ -@@ -1104,7 +1158,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - for (i = 0; i < ins->src_count; ++i) - { - shader_sm1_read_src_param(sm1, &p, &src_params[i]); -- shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle)); -+ shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle), false); - } - } - -@@ -1212,12 +1266,30 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, - return VKD3D_OK; - } - -+static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, -+ enum vkd3d_shader_d3dbc_constant_register set) -+{ -+ unsigned int j; -+ -+ /* Find the highest constant index which is not written by a DEF -+ * instruction. We can't (easily) use an FFZ function for this since it -+ * needs to be limited by the highest used register index. */ -+ for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) -+ { -+ if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) -+ return j; -+ } -+ -+ return 0; -+} -+ - int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) - { - struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm1_parser *sm1; -+ unsigned int i; - int ret; - - if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) -@@ -1257,6 +1329,9 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - - *parser = &sm1->p; - -+ for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) -+ sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); -+ - return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; - } - -@@ -1959,7 +2034,12 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - { - sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; -- assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); -+ if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -+ { -+ /* These can appear in sm4-style combined sample instructions. */ -+ hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); -+ continue; -+ } - - reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; - write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); -@@ -2362,7 +2442,6 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) - { - struct vkd3d_bytecode_buffer buffer = {0}; -- int ret; - - put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - -@@ -2375,10 +2454,17 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun - - put_u32(&buffer, D3DSIO_END); - -- if (!(ret = buffer.status)) -+ if (buffer.status) -+ ctx->result = buffer.status; -+ -+ if (!ctx->result) - { - out->code = buffer.data; - out->size = buffer.size; - } -- return ret; -+ else -+ { -+ vkd3d_free(buffer.data); -+ } -+ return ctx->result; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 53a4c2da4ba..f9efe47f95d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -208,6 +208,7 @@ struct sm6_value - { - const struct sm6_type *type; - enum sm6_value_type value_type; -+ bool is_undefined; - union - { - struct sm6_function_data function; -@@ -1726,8 +1727,16 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - WARN("Unhandled constant array.\n"); - break; - -+ case CST_CODE_UNDEF: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ dst->u.reg.type = VKD3DSPR_UNDEF; -+ /* Mark as explicitly undefined, not the result of a missing constant code or instruction. */ -+ dst->is_undefined = true; -+ break; -+ - default: - FIXME("Unhandled constant code %u.\n", record->code); -+ dst->u.reg.type = VKD3DSPR_UNDEF; - break; - } - -@@ -1737,6 +1746,27 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const - return VKD3D_OK; - } - -+static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) -+{ -+ if (!shader_instruction_array_reserve(&sm6->p.instructions, sm6->p.instructions.count + extra)) -+ { -+ ERR("Failed to allocate instruction.\n"); -+ return NULL; -+ } -+ return &sm6->p.instructions.elements[sm6->p.instructions.count]; -+} -+ -+/* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ -+static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_parser *sm6, -+ enum vkd3d_shader_opcode handler_idx) -+{ -+ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); -+ assert(ins); -+ shader_instruction_init(ins, handler_idx); -+ ++sm6->p.instructions.count; -+ return ins; -+} -+ - static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - { - const struct dxil_block *block = &sm6->root_block; -@@ -1767,7 +1797,8 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - break; - - case MODULE_CODE_VERSION: -- dxil_record_validate_operand_count(record, 1, 1, sm6); -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; - if ((version = record->operands[0]) != 1) - { - FIXME("Unsupported format version %#"PRIx64".\n", version); -@@ -1931,6 +1962,21 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - return VKD3D_OK; - } - -+static bool sm6_block_emit_instructions(struct sm6_block *block, struct sm6_parser *sm6) -+{ -+ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, block->instruction_count + 1); -+ -+ if (!ins) -+ return false; -+ -+ memcpy(ins, block->instructions, block->instruction_count * sizeof(*block->instructions)); -+ sm6->p.instructions.count += block->instruction_count; -+ -+ sm6_parser_add_instruction(sm6, VKD3DSIH_RET); -+ -+ return true; -+} -+ - static enum vkd3d_result sm6_parser_module_init(struct sm6_parser *sm6, const struct dxil_block *block, - unsigned int level) - { -@@ -2065,6 +2111,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - struct vkd3d_shader_version version; - struct dxil_block *block; - enum vkd3d_result ret; -+ unsigned int i; - - count = byte_code_size / sizeof(*byte_code); - if (count < 6) -@@ -2254,6 +2301,16 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t - return ret; - } - -+ for (i = 0; i < sm6->function_count; ++i) -+ { -+ if (!sm6_block_emit_instructions(sm6->functions[i].blocks[0], sm6)) -+ { -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory emitting shader instructions."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ } -+ - dxil_block_destroy(&sm6->root_block); - - return VKD3D_OK; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index ab508502623..4ed7712b0aa 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -2544,6 +2544,8 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru - [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", - [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", - [HLSL_RESOURCE_GATHER_ALPHA] = "gather_alpha", -+ [HLSL_RESOURCE_SAMPLE_INFO] = "sample_info", -+ [HLSL_RESOURCE_RESINFO] = "resinfo", - }; - - assert(load->load_type < ARRAY_SIZE(type_names)); -@@ -2551,8 +2553,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru - dump_deref(buffer, &load->resource); - vkd3d_string_buffer_printf(buffer, ", sampler = "); - dump_deref(buffer, &load->sampler); -- vkd3d_string_buffer_printf(buffer, ", coords = "); -- dump_src(buffer, &load->coords); -+ if (load->coords.node) -+ { -+ vkd3d_string_buffer_printf(buffer, ", coords = "); -+ dump_src(buffer, &load->coords); -+ } - if (load->sample_index.node) - { - vkd3d_string_buffer_printf(buffer, ", sample index = "); -@@ -3296,9 +3301,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - } - } - --static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, -+static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, - const struct hlsl_profile_info *profile, struct vkd3d_shader_message_context *message_context) - { -+ unsigned int i; -+ - memset(ctx, 0, sizeof(*ctx)); - - ctx->profile = profile; -@@ -3307,7 +3314,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, - - if (!(ctx->source_files = hlsl_alloc(ctx, sizeof(*ctx->source_files)))) - return false; -- if (!(ctx->source_files[0] = hlsl_strdup(ctx, source_name ? source_name : ""))) -+ if (!(ctx->source_files[0] = hlsl_strdup(ctx, compile_info->source_name ? compile_info->source_name : ""))) - { - vkd3d_free(ctx->source_files); - return false; -@@ -3346,6 +3353,19 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, - return false; - ctx->cur_buffer = ctx->globals_buffer; - -+ for (i = 0; i < compile_info->option_count; ++i) -+ { -+ const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; -+ -+ if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) -+ { -+ if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; -+ else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; -+ } -+ } -+ - return true; - } - -@@ -3380,6 +3400,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - vkd3d_free((void *)buffer->name); - vkd3d_free(buffer); - } -+ -+ vkd3d_free(ctx->constant_defs.regs); - } - - int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, -@@ -3421,7 +3443,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d - return VKD3D_ERROR_INVALID_ARGUMENT; - } - -- if (!hlsl_ctx_init(&ctx, compile_info->source_name, profile, message_context)) -+ if (!hlsl_ctx_init(&ctx, compile_info, profile, message_context)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - if ((ret = hlsl_lexer_compile(&ctx, hlsl)) == 2) -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 1a4b995abbf..b1928312066 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -634,6 +634,8 @@ enum hlsl_resource_load_type - HLSL_RESOURCE_GATHER_GREEN, - HLSL_RESOURCE_GATHER_BLUE, - HLSL_RESOURCE_GATHER_ALPHA, -+ HLSL_RESOURCE_SAMPLE_INFO, -+ HLSL_RESOURCE_RESINFO, - }; - - struct hlsl_ir_resource_load -@@ -1074,7 +1076,7 @@ struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const - struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); - const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); - --struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, -+struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); - void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); - bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 6bf87f8f916..0695f7864bf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -146,14 +146,6 @@ static struct list *block_to_list(struct hlsl_block *block) - return &block->instrs; - } - --static struct hlsl_block *list_to_block(struct list *list) --{ -- /* This is a temporary hack to ease the transition from lists to blocks. -- * It takes advantage of the fact that an allocated hlsl_block pointer is -- * byte-compatible with an allocated list pointer. */ -- return CONTAINING_RECORD(list, struct hlsl_block, instrs); --} -- - static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) - { - struct hlsl_block *block; -@@ -172,12 +164,6 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) - return list; - } - --static void destroy_instr_list(struct list *list) --{ -- hlsl_free_instr_list(list); -- vkd3d_free(list); --} -- - static void destroy_block(struct hlsl_block *block) - { - hlsl_block_cleanup(block); -@@ -308,7 +294,7 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - return hlsl_types_are_componentwise_equal(ctx, src, dst); - } - --static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *src_type = node->data_type; -@@ -345,7 +331,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - { - struct hlsl_ir_node *component_load; - struct hlsl_type *dst_comp_type; -- struct hlsl_block block; -+ struct hlsl_block store_block; - unsigned int src_idx; - - if (broadcast) -@@ -365,21 +351,21 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - - dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - -- if (!(component_load = hlsl_add_load_component(ctx, instrs, node, src_idx, loc))) -+ if (!(component_load = hlsl_add_load_component(ctx, block_to_list(block), node, src_idx, loc))) - return NULL; - - if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) - return NULL; -- list_add_tail(instrs, &cast->entry); -+ hlsl_block_add_instr(block, cast); - -- if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) - return NULL; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(block, &store_block); - } - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - - return &load->node; - } -@@ -387,12 +373,12 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - { - if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) - return NULL; -- list_add_tail(instrs, &cast->entry); -+ hlsl_block_add_instr(block, cast); - return cast; - } - } - --static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *src_type = node->data_type; -@@ -418,7 +404,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", - src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); - -- return add_cast(ctx, instrs, node, dst_type, loc); -+ return add_cast(ctx, block, node, dst_type, loc); - } - - static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, -@@ -665,7 +651,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, - { - struct hlsl_ir_node *store; - -- if (!(return_value = add_implicit_conversion(ctx, block_to_list(block), return_value, return_type, loc))) -+ if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) - return false; - - if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) -@@ -736,7 +722,7 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc); - --static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, -+static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *array, - struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; -@@ -759,13 +745,13 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h - return false; - } - -- if (!(index = add_implicit_conversion(ctx, instrs, index, -+ if (!(index = add_implicit_conversion(ctx, block, index, - hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) - return false; - - if (!(return_index = hlsl_new_index(ctx, array, index, loc))) - return false; -- list_add_tail(instrs, &return_index->entry); -+ hlsl_block_add_instr(block, return_index); - - return true; - } -@@ -778,7 +764,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h - - if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) - return false; -- list_add_tail(instrs, &cast->entry); -+ hlsl_block_add_instr(block, cast); - index = cast; - - if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) -@@ -792,7 +778,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h - - if (!(return_index = hlsl_new_index(ctx, array, index, loc))) - return false; -- list_add_tail(instrs, &return_index->entry); -+ hlsl_block_add_instr(block, return_index); - - return true; - } -@@ -1117,17 +1103,17 @@ static struct hlsl_ir_function_decl *get_func_decl(struct rb_tree *funcs, - return NULL; - } - --static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) -+static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr) - { -- struct list *list; -+ struct hlsl_block *block; - -- if (!(list = make_empty_list(ctx))) -+ if (!(block = make_empty_block(ctx))) - { -- hlsl_free_instr(node); -+ hlsl_free_instr(instr); - return NULL; - } -- list_add_tail(list, &node->entry); -- return list; -+ hlsl_block_add_instr(block, instr); -+ return block; - } - - static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1165,7 +1151,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - return 0; - hlsl_block_add_block(&expr, block); - -- if (!add_implicit_conversion(ctx, &expr.instrs, node_from_block(&expr), -+ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), - hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) - { - hlsl_block_cleanup(&expr); -@@ -1323,7 +1309,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct - return true; - } - --static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], - struct hlsl_type *type, const struct vkd3d_shader_location *loc) - { -@@ -1347,38 +1333,38 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, - for (i = 0; i < type->dimy * type->dimx; ++i) - { - struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; -- struct hlsl_block block; -+ struct hlsl_block store_block; - unsigned int j; - - for (j = 0; j < HLSL_MAX_OPERANDS; j++) - { - if (operands[j]) - { -- if (!(load = hlsl_add_load_component(ctx, instrs, operands[j], i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, block_to_list(block), operands[j], i, loc))) - return NULL; - - cell_operands[j] = load; - } - } - -- if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) -+ if (!(value = add_expr(ctx, block, op, cell_operands, scalar_type, loc))) - return NULL; - -- if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, value)) - return NULL; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(block, &store_block); - } - - if (!(var_load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -- list_add_tail(instrs, &var_load->node.entry); -+ hlsl_block_add_instr(block, &var_load->node); - - return &var_load->node; - } - - if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) - return NULL; -- list_add_tail(instrs, &expr->entry); -+ hlsl_block_add_instr(block, expr); - - return expr; - } -@@ -1409,7 +1395,7 @@ static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, stru - { - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; - -- return add_expr(ctx, block_to_list(block), op, args, arg->data_type, loc); -+ return add_expr(ctx, block, op, args, arg->data_type, loc); - } - - static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1429,10 +1415,10 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct - bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, - arg->data_type->dimx, arg->data_type->dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg, bool_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) - return NULL; - -- return add_expr(ctx, block_to_list(block), op, args, bool_type, loc); -+ return add_expr(ctx, block, op, args, bool_type, loc); - } - - static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, -@@ -1457,13 +1443,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str - - common_type = get_common_numeric_type(ctx, arg1, arg2, loc); - -- if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, block_to_list(block), arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, block_to_list(block), op, args, common_type, loc); -+ return add_expr(ctx, block, op, args, common_type, loc); - } - - static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1492,13 +1478,13 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str - common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, block_to_list(block), arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, block_to_list(block), op, args, return_type, loc); -+ return add_expr(ctx, block, op, args, return_type, loc); - } - - static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1515,13 +1501,13 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct - - common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, block_to_list(block), arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, block_to_list(block), op, args, common_type, loc); -+ return add_expr(ctx, block, op, args, common_type, loc); - } - - static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1546,13 +1532,13 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct h - return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg1, return_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, block_to_list(block), arg2, integer_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc))) - return NULL; - -- return add_expr(ctx, block_to_list(block), op, args, return_type, loc); -+ return add_expr(ctx, block, op, args, return_type, loc); - } - - static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, -@@ -1599,13 +1585,13 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls - common_type = hlsl_get_vector_type(ctx, base, dim); - ret_type = hlsl_get_scalar_type(ctx, base); - -- if (!(args[0] = add_implicit_conversion(ctx, block_to_list(instrs), arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, block_to_list(instrs), arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, block_to_list(instrs), op, args, ret_type, loc); -+ return add_expr(ctx, instrs, op, args, ret_type, loc); - } - - static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, -@@ -1711,7 +1697,7 @@ static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsig - return true; - } - --static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *lhs, -+static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, - enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) - { - struct hlsl_type *lhs_type = lhs->data_type; -@@ -1720,7 +1706,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - - if (assign_op == ASSIGN_OP_SUB) - { -- if (!(rhs = add_unary_arithmetic_expr(ctx, list_to_block(instrs), HLSL_OP1_NEG, rhs, &rhs->loc))) -+ if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) - return NULL; - assign_op = ASSIGN_OP_ADD; - } -@@ -1729,14 +1715,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - enum hlsl_ir_expr_op op = op_from_assignment(assign_op); - - assert(op); -- if (!(rhs = add_binary_arithmetic_expr(ctx, list_to_block(instrs), op, lhs, rhs, &rhs->loc))) -+ if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) - return NULL; - } - - if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) - writemask = (1 << lhs_type->dimx) - 1; - -- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) -+ if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) - return NULL; - - while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) -@@ -1765,7 +1751,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - { - return NULL; - } -- list_add_tail(instrs, &new_swizzle->entry); -+ hlsl_block_add_instr(block, new_swizzle); - - lhs = swizzle->val.node; - rhs = new_swizzle; -@@ -1811,7 +1797,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - hlsl_cleanup_deref(&resource_deref); - return NULL; - } -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&resource_deref); - } - else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) -@@ -1830,13 +1816,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - - if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) - return NULL; -- list_add_tail(instrs, &c->entry); -+ hlsl_block_add_instr(block, c); - - if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) - return NULL; -- list_add_tail(instrs, &cell->entry); -+ hlsl_block_add_instr(block, cell); - -- if (!(load = hlsl_add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) -+ if (!(load = hlsl_add_load_component(ctx, block_to_list(block), rhs, k++, &rhs->loc))) - return NULL; - - if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) -@@ -1847,7 +1833,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - hlsl_cleanup_deref(&deref); - return NULL; - } -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&deref); - } - } -@@ -1864,7 +1850,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - hlsl_cleanup_deref(&deref); - return NULL; - } -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&deref); - } - -@@ -1873,7 +1859,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - * the last instruction in the list, we do need to copy. */ - if (!(copy = hlsl_new_copy(ctx, rhs))) - return NULL; -- list_add_tail(instrs, ©->entry); -+ hlsl_block_add_instr(block, copy); - return copy; - } - -@@ -1891,7 +1877,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d - return false; - hlsl_block_add_instr(block, one); - -- if (!add_assignment(ctx, block_to_list(block), lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) -+ if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) - return false; - - if (post) -@@ -1930,7 +1916,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - -- if (!(conv = add_implicit_conversion(ctx, block_to_list(instrs), load, dst_comp_type, &src->loc))) -+ if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) - return; - - if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -@@ -2183,14 +2169,14 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - } - } - --static struct list *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) -+static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) - { - struct parse_variable_def *v, *v_next; -- struct list *statements_list; -+ struct hlsl_block *initializers; - struct hlsl_ir_var *var; - struct hlsl_type *type; - -- if (!(statements_list = make_empty_list(ctx))) -+ if (!(initializers = make_empty_block(ctx))) - { - LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) - { -@@ -2239,13 +2225,13 @@ static struct list *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) - - assert(v->initializer.args_count == 1); - hlsl_block_add_instr(v->initializer.instrs, &load->node); -- add_assignment(ctx, block_to_list(v->initializer.instrs), &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); -+ add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); - } - - if (var->storage_modifiers & HLSL_STORAGE_STATIC) - hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); - else -- list_move_tail(statements_list, &v->initializer.instrs->instrs); -+ hlsl_block_add_block(initializers, v->initializer.instrs); - } - else if (var->storage_modifiers & HLSL_STORAGE_STATIC) - { -@@ -2266,7 +2252,7 @@ static struct list *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) - } - hlsl_block_add_instr(&ctx->static_initializers, zero); - -- if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) -+ if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) - { - free_parse_variable_def(v); - continue; -@@ -2283,7 +2269,7 @@ static struct list *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) - } - - vkd3d_free(var_list); -- return statements_list; -+ return initializers; - } - - struct find_function_call_args -@@ -2364,7 +2350,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, - return arg; - - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -- return add_implicit_conversion(ctx, block_to_list(params->instrs), arg, type, loc); -+ return add_implicit_conversion(ctx, params->instrs, arg, type, loc); - } - - static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -2376,7 +2362,7 @@ static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *p - { - struct hlsl_ir_node *new_arg; - -- if (!(new_arg = add_implicit_conversion(ctx, block_to_list(params->instrs), params->args[i], type, loc))) -+ if (!(new_arg = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc))) - return false; - params->args[i] = new_arg; - } -@@ -2569,7 +2555,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); - - operands[0] = params->args[0]; -- return add_expr(ctx, block_to_list(params->instrs), HLSL_OP1_REINTERPRET, operands, data_type, loc); -+ return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); - } - - static bool intrinsic_asuint(struct hlsl_ctx *ctx, -@@ -2605,7 +2591,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); - - operands[0] = params->args[0]; -- return add_expr(ctx, block_to_list(params->instrs), HLSL_OP1_REINTERPRET, operands, data_type, loc); -+ return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); - } - - static bool intrinsic_clamp(struct hlsl_ctx *ctx, -@@ -2677,10 +2663,10 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, - - cast_type = hlsl_get_vector_type(ctx, base, 3); - -- if (!(arg1_cast = add_implicit_conversion(ctx, block_to_list(params->instrs), arg1, cast_type, loc))) -+ if (!(arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc))) - return false; - -- if (!(arg2_cast = add_implicit_conversion(ctx, block_to_list(params->instrs), arg2, cast_type, loc))) -+ if (!(arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc))) - return false; - - if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) -@@ -2879,7 +2865,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer - if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) - return false; - -- if (!(select = hlsl_add_conditional(ctx, block_to_list(params->instrs), ge, frac, neg_frac))) -+ if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) - return false; - - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); -@@ -3042,7 +3028,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, - if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) - return false; - -- if (!(load = hlsl_add_conditional(ctx, block_to_list(params->instrs), specular_or, zero, specular_pow))) -+ if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) - return false; - - if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) -@@ -3163,10 +3149,10 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - ret_type = hlsl_get_scalar_type(ctx, base); - } - -- if (!(cast1 = add_implicit_conversion(ctx, block_to_list(params->instrs), arg1, cast_type1, loc))) -+ if (!(cast1 = add_implicit_conversion(ctx, params->instrs, arg1, cast_type1, loc))) - return false; - -- if (!(cast2 = add_implicit_conversion(ctx, block_to_list(params->instrs), arg2, cast_type2, loc))) -+ if (!(cast2 = add_implicit_conversion(ctx, params->instrs, arg2, cast_type2, loc))) - return false; - - if (!(var = hlsl_new_synthetic_var(ctx, "mul", matrix_type, loc))) -@@ -3216,7 +3202,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - return false; - hlsl_block_add_instr(params->instrs, &load->node); - -- return !!add_implicit_conversion(ctx, block_to_list(params->instrs), &load->node, ret_type, loc); -+ return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); - } - - static bool intrinsic_normalize(struct hlsl_ctx *ctx, -@@ -3328,7 +3314,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, - if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) - return false; - -- if (!(op1 = add_implicit_conversion(ctx, block_to_list(params->instrs), lt, int_type, loc))) -+ if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) - return false; - - /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ -@@ -3336,7 +3322,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, - if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) - return false; - -- if (!(op2 = add_implicit_conversion(ctx, block_to_list(params->instrs), lt, int_type, loc))) -+ if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) - return false; - - if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) -@@ -3441,7 +3427,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, - - type = ge->data_type; - type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -- return !!add_implicit_conversion(ctx, block_to_list(params->instrs), ge, type, loc); -+ return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); - } - - static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -3476,7 +3462,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - hlsl_release_string_buffer(ctx, string); - } - -- if (!(coords = add_implicit_conversion(ctx, block_to_list(params->instrs), params->args[1], -+ if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) - coords = params->args[1]; - -@@ -3684,7 +3670,14 @@ static int intrinsic_function_name_compare(const void *a, const void *b) - return strcmp(a, func->name); - } - --static struct list *add_call(struct hlsl_ctx *ctx, const char *name, -+static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ -+ return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); -+} -+ -+static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, - struct parse_initializer *args, const struct vkd3d_shader_location *loc) - { - struct intrinsic_function *intrinsic; -@@ -3706,7 +3699,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - { - struct hlsl_ir_node *cast; - -- if (!(cast = add_cast(ctx, block_to_list(args->instrs), arg, param->data_type, &arg->loc))) -+ if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) - goto fail; - args->args[i] = cast; - arg = cast; -@@ -3743,7 +3736,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - goto fail; - hlsl_block_add_instr(args->instrs, &load->node); - -- if (!add_assignment(ctx, block_to_list(args->instrs), arg, ASSIGN_OP_ASSIGN, &load->node)) -+ if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) - goto fail; - } - } -@@ -3758,10 +3751,9 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - } - else - { -- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_ir_node *expr; - -- if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) -+ if (!(expr = hlsl_new_void_expr(ctx, loc))) - goto fail; - hlsl_block_add_instr(args->instrs, expr); - } -@@ -3812,7 +3804,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - goto fail; - } - vkd3d_free(args->args); -- return block_to_list(args->instrs); -+ return args->instrs; - - fail: - free_parse_initializer(args); -@@ -3890,7 +3882,7 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct - return false; - } - --static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -3918,7 +3910,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru - } - if (multisampled) - { -- if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) - return false; - } -@@ -3926,7 +3918,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru - assert(offset_dim); - if (params->args_count > 1 + multisampled) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -3936,7 +3928,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru - } - - /* +1 for the mipmap level for non-multisampled textures */ -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) - return false; - -@@ -3945,11 +3937,11 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - return true; - } - --static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -3986,13 +3978,13 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; - - if (offset_dim && params->args_count > 2) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -4008,12 +4000,12 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - - return true; - } - --static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -4056,17 +4048,17 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; - -- if (!(load_params.cmp = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - load_params.cmp = params->args[2]; - - if (offset_dim && params->args_count > 3) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -4082,12 +4074,12 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - - return true; - } - --static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -4154,7 +4146,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - } - else if (offset_dim && params->args_count > 2) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -4179,7 +4171,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; - -@@ -4189,11 +4181,187 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); -+ return true; -+} -+ -+static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, -+ struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *load; -+ -+ if (!dest) -+ return true; -+ -+ if (!(load = hlsl_add_load_component(ctx, block_to_list(instrs), src, component, loc))) -+ return false; -+ -+ if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) -+ return false; -+ - return true; - } - --static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ bool uint_resinfo, has_uint_arg, has_float_arg; -+ struct hlsl_resource_load_params load_params; -+ struct hlsl_ir_node *sample_info, *res_info; -+ struct hlsl_ir_node *zero = NULL, *void_ret; -+ struct hlsl_type *uint_type, *float_type; -+ unsigned int i, j; -+ enum func_argument -+ { -+ ARG_MIP_LEVEL, -+ ARG_WIDTH, -+ ARG_HEIGHT, -+ ARG_ELEMENT_COUNT, -+ ARG_LEVEL_COUNT, -+ ARG_SAMPLE_COUNT, -+ ARG_MAX_ARGS, -+ }; -+ struct hlsl_ir_node *args[ARG_MAX_ARGS] = { 0 }; -+ static const struct overload -+ { -+ enum hlsl_sampler_dim sampler_dim; -+ unsigned int args_count; -+ enum func_argument args[ARG_MAX_ARGS]; -+ } -+ overloads[] = -+ { -+ { HLSL_SAMPLER_DIM_1D, 1, { ARG_WIDTH } }, -+ { HLSL_SAMPLER_DIM_1D, 3, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_1DARRAY, 2, { ARG_WIDTH, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_1DARRAY, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_2D, 2, { ARG_WIDTH, ARG_HEIGHT } }, -+ { HLSL_SAMPLER_DIM_2D, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_3D, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_3D, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_CUBE, 2, { ARG_WIDTH, ARG_HEIGHT } }, -+ { HLSL_SAMPLER_DIM_CUBE, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_CUBEARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_CUBEARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DMS, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_SAMPLE_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DMSARRAY, 4, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_SAMPLE_COUNT } }, -+ }; -+ const struct overload *o = NULL; -+ -+ if (object_type->sampler_dim > HLSL_SAMPLER_DIM_LAST_TEXTURE) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "GetDimensions() is not defined for this type."); -+ } -+ -+ uint_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); -+ float_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT); -+ has_uint_arg = has_float_arg = false; -+ for (i = 0; i < ARRAY_SIZE(overloads); ++i) -+ { -+ const struct overload *iter = &overloads[i]; -+ -+ if (iter->sampler_dim == object_type->sampler_dim && iter->args_count == params->args_count) -+ { -+ for (j = 0; j < params->args_count; ++j) -+ { -+ args[iter->args[j]] = params->args[j]; -+ -+ /* Input parameter. */ -+ if (iter->args[j] == ARG_MIP_LEVEL) -+ { -+ if (!(args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ { -+ return false; -+ } -+ -+ continue; -+ } -+ -+ has_float_arg |= hlsl_types_are_equal(params->args[j]->data_type, float_type); -+ has_uint_arg |= hlsl_types_are_equal(params->args[j]->data_type, uint_type); -+ -+ if (params->args[j]->data_type->class != HLSL_CLASS_SCALAR) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected scalar arguments."); -+ break; -+ } -+ } -+ o = iter; -+ break; -+ } -+ } -+ uint_resinfo = !has_float_arg && has_uint_arg; -+ -+ if (!o) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, object_type))) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Unexpected number of arguments %u for %s.%s().", params->args_count, string->buffer, name); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ } -+ -+ if (!args[ARG_MIP_LEVEL]) -+ { -+ if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ args[ARG_MIP_LEVEL] = zero; -+ } -+ -+ memset(&load_params, 0, sizeof(load_params)); -+ load_params.type = HLSL_RESOURCE_RESINFO; -+ load_params.resource = object; -+ load_params.lod = args[ARG_MIP_LEVEL]; -+ load_params.format = hlsl_get_vector_type(ctx, uint_resinfo ? HLSL_TYPE_UINT : HLSL_TYPE_FLOAT, 4); -+ -+ if (!(res_info = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, res_info); -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_WIDTH], res_info, 0, loc)) -+ return false; -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_HEIGHT], res_info, 1, loc)) -+ return false; -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_ELEMENT_COUNT], res_info, -+ object_type->sampler_dim == HLSL_SAMPLER_DIM_1DARRAY ? 1 : 2, loc)) -+ { -+ return false; -+ } -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_LEVEL_COUNT], res_info, 3, loc)) -+ return false; -+ -+ if (args[ARG_SAMPLE_COUNT]) -+ { -+ memset(&load_params, 0, sizeof(load_params)); -+ load_params.type = HLSL_RESOURCE_SAMPLE_INFO; -+ load_params.resource = object; -+ load_params.format = args[ARG_SAMPLE_COUNT]->data_type; -+ if (!(sample_info = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, sample_info); -+ -+ if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info)) -+ return false; -+ } -+ -+ if (!(void_ret = hlsl_new_void_expr(ctx, loc))) -+ return false; -+ hlsl_block_add_instr(block, void_ret); -+ -+ return true; -+} -+ -+static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -4235,17 +4403,17 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.coords = params->args[1]; - -- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.lod = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - load_params.lod = params->args[2]; - - if (offset_dim && params->args_count > 3) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -4259,11 +4427,11 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - return true; - } - --static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -4302,21 +4470,21 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.coords = params->args[1]; - -- if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.ddx = params->args[2]; - -- if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], -+ if (!(load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.ddy = params->args[3]; - - if (offset_dim && params->args_count > 4) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -4330,14 +4498,14 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - return true; - } - - static const struct method_function - { - const char *name; -- bool (*handler)(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+ bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); - } - object_methods[] = -@@ -4348,6 +4516,8 @@ object_methods[] = - { "GatherGreen", add_gather_method_call }, - { "GatherRed", add_gather_method_call }, - -+ { "GetDimensions", add_getdimensions_method_call }, -+ - { "Load", add_load_method_call }, - - { "Sample", add_sample_method_call }, -@@ -4365,7 +4535,7 @@ static int object_method_function_name_compare(const void *a, const void *b) - return strcmp(a, func->name); - } - --static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -4386,7 +4556,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl - if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), - sizeof(*method), object_method_function_name_compare))) - { -- return method->handler(ctx, instrs, object, name, params, loc); -+ return method->handler(ctx, block, object, name, params, loc); - } - else - { -@@ -4557,10 +4727,6 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %token C_INTEGER - %token PRE_LINE - --%type declaration --%type declaration_statement --%type primary_expr --%type struct_declaration_without_vars - %type type_specs - %type variables_def - %type variables_def_typed -@@ -4586,6 +4752,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type bitxor_expr - %type compound_statement - %type conditional_expr -+%type declaration -+%type declaration_statement - %type equality_expr - %type expr - %type expr_optional -@@ -4597,11 +4765,13 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type loop_statement - %type mul_expr - %type postfix_expr -+%type primary_expr - %type relational_expr - %type shift_expr - %type selection_statement - %type statement - %type statement_list -+%type struct_declaration_without_vars - %type unary_expr - - %type boolean -@@ -4661,9 +4831,9 @@ hlsl_prog: - | hlsl_prog buffer_declaration buffer_body - | hlsl_prog declaration_statement - { -- if (!list_empty($2)) -+ if (!list_empty(&$2->instrs)) - hlsl_fixme(ctx, &@2, "Uniform initializer."); -- destroy_instr_list($2); -+ destroy_block($2); - } - | hlsl_prog preproc_directive - | hlsl_prog ';' -@@ -4731,7 +4901,7 @@ struct_declaration_without_vars: - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers are not allowed on struct type declarations."); - -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - -@@ -5515,7 +5685,7 @@ declaration_statement: - | struct_declaration_without_vars - | typedef - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - -@@ -5912,9 +6082,6 @@ statement_list: - - statement: - declaration_statement -- { -- $$ = list_to_block($1); -- } - | expr_statement - | compound_statement - | jump_statement -@@ -6006,7 +6173,7 @@ loop_statement: - } - | attribute_list_optional KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement - { -- $$ = create_loop(ctx, LOOP_FOR, &$1, list_to_block($5), $6, $7, $9, &@2); -+ $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); - hlsl_pop_scope(ctx); - } - -@@ -6042,7 +6209,7 @@ primary_expr: - - if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, c))) -+ if (!($$ = make_block(ctx, c))) - YYABORT; - } - | C_INTEGER -@@ -6051,7 +6218,7 @@ primary_expr: - - if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, c))) -+ if (!($$ = make_block(ctx, c))) - YYABORT; - } - | boolean -@@ -6060,7 +6227,7 @@ primary_expr: - - if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, c))) -+ if (!($$ = make_block(ctx, c))) - { - hlsl_free_instr(c); - YYABORT; -@@ -6078,12 +6245,12 @@ primary_expr: - } - if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, &load->node))) -+ if (!($$ = make_block(ctx, &load->node))) - YYABORT; - } - | '(' expr ')' - { -- $$ = block_to_list($2); -+ $$ = $2; - } - | var_identifier '(' func_arguments ')' - { -@@ -6106,7 +6273,7 @@ primary_expr: - YYABORT; - if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, &load->node))) -+ if (!($$ = make_block(ctx, &load->node))) - YYABORT; - } - else -@@ -6118,9 +6285,6 @@ primary_expr: - - postfix_expr: - primary_expr -- { -- $$ = list_to_block($1); -- } - | postfix_expr OP_INC - { - if (!add_increment(ctx, $1, false, true, &@2)) -@@ -6185,7 +6349,7 @@ postfix_expr: - hlsl_block_add_block($3, $1); - destroy_block($1); - -- if (!add_array_access(ctx, block_to_list($3), array, index, &@2)) -+ if (!add_array_access(ctx, $3, array, index, &@2)) - { - destroy_block($3); - YYABORT; -@@ -6236,7 +6400,7 @@ postfix_expr: - hlsl_block_add_block($1, $5.instrs); - vkd3d_free($5.instrs); - -- if (!add_method_call(ctx, block_to_list($1), object, $3, &$5, &@3)) -+ if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) - { - destroy_block($1); - vkd3d_free($5.args); -@@ -6324,7 +6488,7 @@ unary_expr: - YYABORT; - } - -- if (!add_cast(ctx, block_to_list($6), node_from_block($6), dst_type, &@3)) -+ if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) - { - destroy_block($6); - YYABORT; -@@ -6455,13 +6619,13 @@ conditional_expr: - if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) - YYABORT; - -- if (!(first = add_implicit_conversion(ctx, block_to_list($1), first, common_type, &@3))) -+ if (!(first = add_implicit_conversion(ctx, $1, first, common_type, &@3))) - YYABORT; - -- if (!(second = add_implicit_conversion(ctx, block_to_list($1), second, common_type, &@5))) -+ if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) - YYABORT; - -- if (!hlsl_add_conditional(ctx, block_to_list($1), cond, first, second)) -+ if (!hlsl_add_conditional(ctx, $1, cond, first, second)) - YYABORT; - $$ = $1; - } -@@ -6480,7 +6644,7 @@ assignment_expr: - } - hlsl_block_add_block($3, $1); - destroy_block($1); -- if (!add_assignment(ctx, block_to_list($3), lhs, $2, rhs)) -+ if (!add_assignment(ctx, $3, lhs, $2, rhs)) - YYABORT; - $$ = $3; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 09a3ea4ca08..4f5a5b02a67 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -574,6 +574,37 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, - return progress; - } - -+typedef bool (*PFN_lower_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *); -+ -+static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ PFN_lower_func func = context; -+ struct hlsl_block block; -+ -+ hlsl_block_init(&block); -+ if (func(ctx, instr, &block)) -+ { -+ struct hlsl_ir_node *replacement = LIST_ENTRY(list_tail(&block.instrs), struct hlsl_ir_node, entry); -+ -+ list_move_before(&instr->entry, &block.instrs); -+ hlsl_replace_node(instr, replacement); -+ return true; -+ } -+ else -+ { -+ hlsl_block_cleanup(&block); -+ return false; -+ } -+} -+ -+/* Specific form of transform_ir() for passes which convert a single instruction -+ * to a block of one or more instructions. This helper takes care of setting up -+ * the block and calling hlsl_replace_node_with_block(). */ -+static bool lower_ir(struct hlsl_ctx *ctx, PFN_lower_func func, struct hlsl_block *block) -+{ -+ return hlsl_transform_ir(ctx, call_lower_func, block, func); -+} -+ - static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - bool res; -@@ -2087,9 +2118,11 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - case HLSL_RESOURCE_GATHER_GREEN: - case HLSL_RESOURCE_GATHER_BLUE: - case HLSL_RESOURCE_GATHER_ALPHA: -+ case HLSL_RESOURCE_RESINFO: - case HLSL_RESOURCE_SAMPLE_CMP: - case HLSL_RESOURCE_SAMPLE_CMP_LZ: - case HLSL_RESOURCE_SAMPLE_GRAD: -+ case HLSL_RESOURCE_SAMPLE_INFO: - return false; - - case HLSL_RESOURCE_SAMPLE: -@@ -2356,7 +2389,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return true; - } - --struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, -+struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) - { - struct hlsl_block then_block, else_block; -@@ -2382,18 +2415,18 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *ins - - if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) - return NULL; -- list_add_tail(instrs, &iff->entry); -+ hlsl_block_add_instr(instrs, iff); - - if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) - return NULL; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(instrs, &load->node); - - return &load->node; - } - --static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; -+ struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; - struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_constant_value high_bit_value; - struct hlsl_ir_expr *expr; -@@ -2414,56 +2447,52 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - - if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) - return false; -- list_add_before(&instr->entry, &xor->entry); -+ hlsl_block_add_instr(block, xor); - - for (i = 0; i < type->dimx; ++i) - high_bit_value.u[i].u = 0x80000000; - if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) - return false; -- list_add_before(&instr->entry, &high_bit->entry); -+ hlsl_block_add_instr(block, high_bit); - - if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) - return false; -- list_add_before(&instr->entry, &and->entry); -+ hlsl_block_add_instr(block, and); - - if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs1->entry); -+ hlsl_block_add_instr(block, abs1); - - if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast1->entry); -+ hlsl_block_add_instr(block, cast1); - - if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs2->entry); -+ hlsl_block_add_instr(block, abs2); - - if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast2->entry); -+ hlsl_block_add_instr(block, cast2); - - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) - return false; -- list_add_before(&instr->entry, &div->entry); -+ hlsl_block_add_instr(block, div); - - if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast3->entry); -+ hlsl_block_add_instr(block, cast3); - - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg->entry); -- -- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) -- return false; -- hlsl_replace_node(instr, cond); -+ hlsl_block_add_instr(block, neg); - -- return true; -+ return hlsl_add_conditional(ctx, block, and, neg, cast3); - } - --static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; -+ struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; - struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_constant_value high_bit_value; - struct hlsl_ir_expr *expr; -@@ -2486,45 +2515,41 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - high_bit_value.u[i].u = 0x80000000; - if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) - return false; -- list_add_before(&instr->entry, &high_bit->entry); -+ hlsl_block_add_instr(block, high_bit); - - if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) - return false; -- list_add_before(&instr->entry, &and->entry); -+ hlsl_block_add_instr(block, and); - - if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs1->entry); -+ hlsl_block_add_instr(block, abs1); - - if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast1->entry); -+ hlsl_block_add_instr(block, cast1); - - if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs2->entry); -+ hlsl_block_add_instr(block, abs2); - - if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast2->entry); -+ hlsl_block_add_instr(block, cast2); - - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) - return false; -- list_add_before(&instr->entry, &div->entry); -+ hlsl_block_add_instr(block, div); - - if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast3->entry); -+ hlsl_block_add_instr(block, cast3); - - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg->entry); -- -- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) -- return false; -- hlsl_replace_node(instr, cond); -+ hlsl_block_add_instr(block, neg); - -- return true; -+ return hlsl_add_conditional(ctx, block, and, neg, cast3); - } - - static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2608,9 +2633,9 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void - return false; - } - --static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; -+ struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one, *mul3; - struct hlsl_type *type = instr->data_type, *btype; - struct hlsl_constant_value one_value; - struct hlsl_ir_expr *expr; -@@ -2631,47 +2656,45 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - - if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) - return false; -- list_add_before(&instr->entry, &mul1->entry); -+ hlsl_block_add_instr(block, mul1); - - if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg1->entry); -+ hlsl_block_add_instr(block, neg1); - - if (!(ge = hlsl_new_binary_expr(ctx, HLSL_OP2_GEQUAL, mul1, neg1))) - return false; - ge->data_type = btype; -- list_add_before(&instr->entry, &ge->entry); -+ hlsl_block_add_instr(block, ge); - - if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg2->entry); -+ hlsl_block_add_instr(block, neg2); - -- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) -+ if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) - return false; - - for (i = 0; i < type->dimx; ++i) - one_value.u[i].f = 1.0f; - if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) - return false; -- list_add_before(&instr->entry, &one->entry); -+ hlsl_block_add_instr(block, one); - - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) - return false; -- list_add_before(&instr->entry, &div->entry); -+ hlsl_block_add_instr(block, div); - - if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, div, arg1))) - return false; -- list_add_before(&instr->entry, &mul2->entry); -+ hlsl_block_add_instr(block, mul2); - - if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &frc->entry); -+ hlsl_block_add_instr(block, frc); - -- expr->op = HLSL_OP2_MUL; -- hlsl_src_remove(&expr->operands[0]); -- hlsl_src_remove(&expr->operands[1]); -- hlsl_src_from_node(&expr->operands[0], frc); -- hlsl_src_from_node(&expr->operands[1], cond); -+ if (!(mul3 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, frc, cond))) -+ return false; -+ hlsl_block_add_instr(block, mul3); - - return true; - } -@@ -2683,8 +2706,8 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - struct hlsl_type *arg_type, *cmp_type; - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; - struct hlsl_ir_jump *jump; -+ struct hlsl_block block; - unsigned int i, count; -- struct list instrs; - - if (instr->type != HLSL_IR_JUMP) - return false; -@@ -2692,38 +2715,38 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) - return false; - -- list_init(&instrs); -+ hlsl_block_init(&block); - - arg_type = jump->condition.node->data_type; - if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) - return false; -- list_add_tail(&instrs, &zero->entry); -+ hlsl_block_add_instr(&block, zero); - - operands[0] = jump->condition.node; - operands[1] = zero; - cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); - if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) - return false; -- list_add_tail(&instrs, &cmp->entry); -+ hlsl_block_add_instr(&block, cmp); - - if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) - return false; -- list_add_tail(&instrs, &bool_false->entry); -+ hlsl_block_add_instr(&block, bool_false); - - or = bool_false; - - count = hlsl_type_component_count(cmp_type); - for (i = 0; i < count; ++i) - { -- if (!(load = hlsl_add_load_component(ctx, &instrs, cmp, i, &instr->loc))) -+ if (!(load = hlsl_add_load_component(ctx, &block.instrs, cmp, i, &instr->loc))) - return false; - - if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) - return NULL; -- list_add_tail(&instrs, &or->entry); -+ hlsl_block_add_instr(&block, or); - } - -- list_move_tail(&instr->entry, &instrs); -+ list_move_tail(&instr->entry, &block.instrs); - hlsl_src_remove(&jump->condition); - hlsl_src_from_node(&jump->condition, or); - jump->type = HLSL_IR_JUMP_DISCARD_NZ; -@@ -2953,7 +2976,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - load->sampler.offset.node->last_read = last_read; - } - -- load->coords.node->last_read = last_read; -+ if (load->coords.node) -+ load->coords.node->last_read = last_read; - if (load->texel_offset.node) - load->texel_offset.node->last_read = last_read; - if (load->lod.node) -@@ -4268,10 +4292,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); - hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); - hlsl_transform_ir(ctx, lower_int_dot, body, NULL); -- hlsl_transform_ir(ctx, lower_int_division, body, NULL); -- hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); -+ lower_ir(ctx, lower_int_division, body); -+ lower_ir(ctx, lower_int_modulus, body); - hlsl_transform_ir(ctx, lower_int_abs, body, NULL); -- hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); -+ lower_ir(ctx, lower_float_modulus, body); - hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 9eefb82c226..d74f81afc39 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -247,13 +247,13 @@ static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_s - reg->immconst_type = VKD3D_IMMCONST_SCALAR; - } - --static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) -+void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) - { - memset(ins, 0, sizeof(*ins)); - ins->handler_idx = handler_idx; - } - --enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) -+static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) - { - struct hull_flattener flattener = {*src_instructions}; - struct vkd3d_shader_instruction_array *instructions; -@@ -388,7 +388,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p - return VKD3D_OK; - } - --enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( -+static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( - struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) - { - struct vkd3d_shader_instruction_array *instructions; -@@ -999,7 +999,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - shader_instruction_init(ins, VKD3DSIH_NOP); - } - --enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, -+static enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, - enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, - struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) - { -@@ -1070,3 +1070,154 @@ enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_i - *instructions = normaliser.instructions; - return VKD3D_OK; - } -+ -+struct flat_constant_def -+{ -+ enum vkd3d_shader_d3dbc_constant_register set; -+ uint32_t index; -+ uint32_t value[4]; -+}; -+ -+struct flat_constants_normaliser -+{ -+ struct vkd3d_shader_parser *parser; -+ struct flat_constant_def *defs; -+ size_t def_count, defs_capacity; -+}; -+ -+static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, -+ enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) -+{ -+ static const struct -+ { -+ enum vkd3d_shader_register_type type; -+ enum vkd3d_shader_d3dbc_constant_register set; -+ uint32_t offset; -+ } -+ regs[] = -+ { -+ {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, -+ {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, -+ {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, -+ {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, -+ {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, -+ {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, -+ }; -+ -+ unsigned int i; -+ -+ for (i = 0; i < ARRAY_SIZE(regs); ++i) -+ { -+ if (reg->type == regs[i].type) -+ { -+ if (reg->idx[0].rel_addr) -+ { -+ FIXME("Unhandled relative address.\n"); -+ return false; -+ } -+ -+ *set = regs[i].set; -+ *index = regs[i].offset + reg->idx[0].offset; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_param *param, -+ const struct flat_constants_normaliser *normaliser) -+{ -+ enum vkd3d_shader_d3dbc_constant_register set; -+ uint32_t index; -+ size_t i, j; -+ -+ if (!get_flat_constant_register_type(¶m->reg, &set, &index)) -+ return; -+ -+ for (i = 0; i < normaliser->def_count; ++i) -+ { -+ if (normaliser->defs[i].set == set && normaliser->defs[i].index == index) -+ { -+ param->reg.type = VKD3DSPR_IMMCONST; -+ param->reg.idx_count = 0; -+ param->reg.immconst_type = VKD3D_IMMCONST_VEC4; -+ for (j = 0; j < 4; ++j) -+ param->reg.u.immconst_uint[j] = normaliser->defs[i].value[j]; -+ return; -+ } -+ } -+ -+ param->reg.type = VKD3DSPR_CONSTBUFFER; -+ param->reg.idx[0].offset = set; /* register ID */ -+ param->reg.idx[1].offset = set; /* register index */ -+ param->reg.idx[2].offset = index; /* buffer index */ -+ param->reg.idx_count = 3; -+} -+ -+static enum vkd3d_result instruction_array_normalise_flat_constants(struct vkd3d_shader_parser *parser) -+{ -+ struct flat_constants_normaliser normaliser = {.parser = parser}; -+ unsigned int i, j; -+ -+ for (i = 0; i < parser->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; -+ -+ if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) -+ { -+ struct flat_constant_def *def; -+ -+ if (!vkd3d_array_reserve((void **)&normaliser.defs, &normaliser.defs_capacity, -+ normaliser.def_count + 1, sizeof(*normaliser.defs))) -+ { -+ vkd3d_free(normaliser.defs); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ def = &normaliser.defs[normaliser.def_count++]; -+ -+ get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); -+ for (j = 0; j < 4; ++j) -+ def->value[j] = ins->src[0].reg.u.immconst_uint[j]; -+ -+ vkd3d_shader_instruction_make_nop(ins); -+ } -+ else -+ { -+ for (j = 0; j < ins->src_count; ++j) -+ shader_register_normalise_flat_constants((struct vkd3d_shader_src_param *)&ins->src[j], &normaliser); -+ } -+ } -+ -+ vkd3d_free(normaliser.defs); -+ return VKD3D_OK; -+} -+ -+enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &parser->instructions; -+ enum vkd3d_result result = VKD3D_OK; -+ -+ if (parser->shader_desc.is_dxil) -+ return result; -+ -+ if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL -+ && (result = instruction_array_flatten_hull_shader_phases(instructions)) >= 0) -+ { -+ result = instruction_array_normalise_hull_shader_control_point_io(instructions, -+ &parser->shader_desc.input_signature); -+ } -+ if (result >= 0) -+ result = instruction_array_normalise_io_registers(instructions, parser->shader_version.type, -+ &parser->shader_desc.input_signature, &parser->shader_desc.output_signature, -+ &parser->shader_desc.patch_constant_signature); -+ -+ if (result >= 0) -+ result = instruction_array_normalise_flat_constants(parser); -+ -+ if (result >= 0 && TRACE_ON()) -+ vkd3d_shader_trace(instructions, &parser->shader_version); -+ -+ return result; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index 94079696280..6fb61eff6c3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -30,6 +30,13 @@ - - #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) - -+static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) -+{ -+ if (!ctx->expansion_count) -+ return NULL; -+ return ctx->expansion_stack[ctx->expansion_count - 1].macro; -+} -+ - static void update_location(struct preproc_ctx *ctx); - - #define YY_USER_ACTION update_location(yyget_extra(yyscanner)); -@@ -125,7 +132,20 @@ INT_SUFFIX [uUlL]{0,2} - const char *p; - - if (!ctx->last_was_newline) -- return T_HASHSTRING; -+ { -+ struct preproc_macro *macro; -+ -+ /* Stringification is only done for function-like macro bodies. -+ * Anywhere else, we need to parse it as two separate tokens. -+ * We could use a state for this, but yyless() is easier and cheap. -+ */ -+ -+ if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) -+ return T_HASHSTRING; -+ -+ yyless(1); -+ return T_TEXT; -+ } - - for (p = yytext + 1; strchr(" \t", *p); ++p) - ; -@@ -219,13 +239,6 @@ static bool preproc_is_writing(struct preproc_ctx *ctx) - return file->if_stack[file->if_count - 1].current_true; - } - --static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) --{ -- if (!ctx->expansion_count) -- return NULL; -- return ctx->expansion_stack[ctx->expansion_count - 1].macro; --} -- - /* Concatenation is not done for object-like macros, but is done for both - * function-like macro bodies and their arguments. */ - static bool should_concat(struct preproc_ctx *ctx) -@@ -334,6 +347,43 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, - return true; - } - -+static void preproc_stringify(struct preproc_ctx *ctx, struct vkd3d_string_buffer *buffer, const char *text) -+{ -+ const struct preproc_text *expansion; -+ const char *p = text + 1; -+ unsigned int i; -+ -+ while (*p == ' ' || *p == '\t') -+ ++p; -+ -+ vkd3d_string_buffer_printf(buffer, "\""); -+ if ((expansion = find_arg_expansion(ctx, p))) -+ { -+ size_t len = expansion->text.content_size; -+ size_t start = 0; -+ -+ while (len && strchr(" \t\r\n", expansion->text.buffer[len - 1])) -+ --len; -+ -+ while (start < len && strchr(" \t\r\n", expansion->text.buffer[start])) -+ ++start; -+ -+ for (i = start; i < len; ++i) -+ { -+ char c = expansion->text.buffer[i]; -+ -+ if (c == '\\' || c == '"') -+ vkd3d_string_buffer_printf(buffer, "\\"); -+ vkd3d_string_buffer_printf(buffer, "%c", c); -+ } -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, "%s", p); -+ } -+ vkd3d_string_buffer_printf(buffer, "\""); -+} -+ - int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - { - struct preproc_ctx *ctx = yyget_extra(scanner); -@@ -441,9 +491,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - switch (func_state->state) - { - case STATE_NONE: -- { -- struct preproc_macro *macro; -- - if (token == T_CONCAT && should_concat(ctx)) - { - while (ctx->buffer.content_size -@@ -452,37 +499,17 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - break; - } - -- /* Stringification, however, is only done for function-like -- * macro bodies. */ -- if (token == T_HASHSTRING && (macro = preproc_get_top_macro(ctx)) && macro->arg_count) -+ if (token == T_HASHSTRING) - { -- const struct preproc_text *expansion; -- const char *p = text + 1; -- unsigned int i; -+ struct vkd3d_string_buffer buffer; - - if (ctx->current_directive) - return return_token(token, lval, text); - -- while (*p == ' ' || *p == '\t') -- ++p; -- -- vkd3d_string_buffer_printf(&ctx->buffer, "\""); -- if ((expansion = find_arg_expansion(ctx, p))) -- { -- for (i = 0; i < expansion->text.content_size; ++i) -- { -- char c = expansion->text.buffer[i]; -- -- if (c == '\\' || c == '"') -- vkd3d_string_buffer_printf(&ctx->buffer, "\\"); -- vkd3d_string_buffer_printf(&ctx->buffer, "%c", c); -- } -- } -- else -- { -- vkd3d_string_buffer_printf(&ctx->buffer, "%s", p); -- } -- vkd3d_string_buffer_printf(&ctx->buffer, "\""); -+ vkd3d_string_buffer_init(&buffer); -+ preproc_stringify(ctx, &buffer, text); -+ vkd3d_string_buffer_printf(&ctx->buffer, "%s", buffer.buffer); -+ vkd3d_string_buffer_cleanup(&buffer); - break; - } - -@@ -586,7 +613,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - else - vkd3d_string_buffer_printf(&ctx->buffer, "%s ", text); - break; -- } - - case STATE_IDENTIFIER: - if (token == '(') -@@ -628,6 +654,41 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - - switch (token) - { -+ /* Most text gets left alone (e.g. if it contains macros, -+ * the macros should be evaluated later). -+ * Arguments are a special case, and are replaced with -+ * their values immediately. */ -+ case T_IDENTIFIER: -+ case T_IDENTIFIER_PAREN: -+ { -+ const struct preproc_text *expansion; -+ -+ if ((expansion = find_arg_expansion(ctx, text))) -+ { -+ preproc_push_expansion(ctx, expansion, NULL); -+ continue; -+ } -+ -+ if (current_arg) -+ preproc_text_add(current_arg, text); -+ break; -+ } -+ -+ /* Stringification is another special case. Unsurprisingly, -+ * we need to stringify if this is an argument. More -+ * surprisingly, we need to stringify even if it's not. */ -+ case T_HASHSTRING: -+ { -+ struct vkd3d_string_buffer buffer; -+ -+ vkd3d_string_buffer_init(&buffer); -+ preproc_stringify(ctx, &buffer, text); -+ if (current_arg) -+ preproc_text_add(current_arg, buffer.buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+ break; -+ } -+ - case T_NEWLINE: - if (current_arg) - preproc_text_add(current_arg, " "); -@@ -686,6 +747,9 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - if (current_arg) - preproc_text_add(current_arg, text); - } -+ -+ if (current_arg) -+ preproc_text_add(current_arg, " "); - break; - } - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 9725a5c7e25..d71f0a698d9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -199,6 +199,21 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d - } - } - -+static inline bool register_is_undef(const struct vkd3d_shader_register *reg) -+{ -+ return reg->type == VKD3DSPR_UNDEF; -+} -+ -+static inline bool register_is_constant(const struct vkd3d_shader_register *reg) -+{ -+ return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); -+} -+ -+static inline bool register_is_constant_or_undef(const struct vkd3d_shader_register *reg) -+{ -+ return register_is_constant(reg) || register_is_undef(reg); -+} -+ - #define VKD3D_SPIRV_VERSION 0x00010000 - #define VKD3D_SPIRV_GENERATOR_ID 18 - #define VKD3D_SPIRV_GENERATOR_VERSION 8 -@@ -1746,6 +1761,38 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, - } - } - -+static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, -+ enum vkd3d_data_type data_type, unsigned int component_count) -+{ -+ uint32_t scalar_id; -+ -+ if (component_count == 1) -+ { -+ switch (data_type) -+ { -+ case VKD3D_DATA_FLOAT: -+ case VKD3D_DATA_SNORM: -+ case VKD3D_DATA_UNORM: -+ return vkd3d_spirv_get_op_type_float(builder, 32); -+ break; -+ case VKD3D_DATA_INT: -+ case VKD3D_DATA_UINT: -+ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); -+ break; -+ case VKD3D_DATA_DOUBLE: -+ return vkd3d_spirv_get_op_type_float(builder, 64); -+ default: -+ FIXME("Unhandled data type %#x.\n", data_type); -+ return 0; -+ } -+ } -+ else -+ { -+ scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1); -+ return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); -+ } -+} -+ - static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) - { - vkd3d_spirv_stream_init(&builder->debug_stream); -@@ -2429,13 +2476,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve - - compiler->shader_type = shader_version->type; - -- compiler->input_signature = shader_desc->input_signature; -- compiler->output_signature = shader_desc->output_signature; -- compiler->patch_constant_signature = shader_desc->patch_constant_signature; -- memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); -- memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); -- memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); -- - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) - { - compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); -@@ -2536,13 +2576,13 @@ static bool spirv_compiler_check_shader_visibility(const struct spirv_compiler * - } - - static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_constant_buffer( -- const struct spirv_compiler *compiler, const struct vkd3d_shader_constant_buffer *cb) -+ const struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) - { -- unsigned int register_space = cb->range.space; -- unsigned int reg_idx = cb->range.first; -+ unsigned int register_space = range->space; -+ unsigned int reg_idx = range->first; - unsigned int i; - -- if (cb->range.first != cb->range.last) -+ if (range->first != range->last) - return NULL; - - for (i = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) -@@ -3211,7 +3251,7 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil - struct vkd3d_symbol reg_symbol, *symbol; - struct rb_entry *entry; - -- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); -+ assert(!register_is_constant_or_undef(reg)); - - if (reg->type == VKD3DSPR_TEMP) - { -@@ -3553,6 +3593,19 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi - vkd3d_component_type_from_data_type(reg->data_type), component_count, values); - } - -+static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register *reg, DWORD write_mask) -+{ -+ unsigned int component_count = vkd3d_write_mask_component_count(write_mask); -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ uint32_t type_id; -+ -+ assert(reg->type == VKD3DSPR_UNDEF); -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); -+ return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); -+} -+ - static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask, - const struct vkd3d_shader_register_info *reg_info) -@@ -3563,7 +3616,7 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type; - unsigned int skipped_component_mask; - -- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); -+ assert(!register_is_constant_or_undef(reg)); - assert(vkd3d_write_mask_component_count(write_mask) == 1); - - component_idx = vkd3d_write_mask_get_component_idx(write_mask); -@@ -3615,6 +3668,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); - else if (reg->type == VKD3DSPR_IMMCONST64) - return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); -+ else if (reg->type == VKD3DSPR_UNDEF) -+ return spirv_compiler_emit_load_undef(compiler, reg, write_mask); - - component_count = vkd3d_write_mask_component_count(write_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type); -@@ -3827,7 +3882,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, - unsigned int src_write_mask = write_mask; - uint32_t type_id; - -- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); -+ assert(!register_is_constant_or_undef(reg)); - - if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) - return; -@@ -5477,28 +5532,24 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * - return var_id; - } - --static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_constant_buffer(struct spirv_compiler *compiler, unsigned int size, -+ const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_register *reg) - { -- const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; - const SpvStorageClass storage_class = SpvStorageClassUniform; -- const struct vkd3d_shader_register *reg = &cb->src.reg; - struct vkd3d_push_constant_buffer_binding *push_cb; - struct vkd3d_descriptor_variable_info var_info; - struct vkd3d_symbol reg_symbol; - -- assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); -- -- if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, cb))) -+ if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, range))) - { - /* Push constant buffers are handled in - * spirv_compiler_emit_push_constant_buffers(). - */ -- unsigned int cb_size_in_bytes = cb->size * VKD3D_VEC4_SIZE * sizeof(uint32_t); -+ unsigned int cb_size_in_bytes = size * VKD3D_VEC4_SIZE * sizeof(uint32_t); - push_cb->reg = *reg; -- push_cb->size = cb->size; -+ push_cb->size = size; - if (cb_size_in_bytes > push_cb->pc.size) - { - WARN("Constant buffer size %u exceeds push constant size %u.\n", -@@ -5508,17 +5559,17 @@ static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compi - } - - vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); -- length_id = spirv_compiler_get_constant_uint(compiler, cb->size); -+ length_id = spirv_compiler_get_constant_uint(compiler, size); - array_type_id = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); - vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 16); - - struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); - vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); - vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); -- vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", cb->size); -+ vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); - - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, -- reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); -+ reg, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); - - vkd3d_symbol_make_register(®_symbol, reg); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -@@ -5528,6 +5579,16 @@ static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compi - spirv_compiler_put_symbol(compiler, ®_symbol); - } - -+static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; -+ -+ assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); -+ -+ spirv_compiler_emit_constant_buffer(compiler, cb->size, &cb->range, &cb->src.reg); -+} -+ - static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { -@@ -6641,7 +6702,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - uint32_t components[VKD3D_VEC4_SIZE]; - unsigned int i, component_count; - -- if (src->reg.type == VKD3DSPR_IMMCONST || src->reg.type == VKD3DSPR_IMMCONST64 || dst->modifiers || src->modifiers) -+ if (register_is_constant_or_undef(&src->reg) || dst->modifiers || src->modifiers) - goto general_implementation; - - spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); -@@ -9436,6 +9497,26 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - return ret; - } - -+static void spirv_compiler_emit_sm1_constant_buffer(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_desc *desc, enum vkd3d_shader_d3dbc_constant_register set, -+ enum vkd3d_data_type data_type) -+{ -+ struct vkd3d_shader_register_range range = {.space = 0, .first = set, .last = set}; -+ uint32_t count = desc->flat_constant_count[set].external; -+ struct vkd3d_shader_register reg = -+ { -+ .type = VKD3DSPR_CONSTBUFFER, -+ .idx[0].offset = set, /* register ID */ -+ .idx[1].offset = set, /* register index */ -+ .idx[2].offset = count, /* size */ -+ .idx_count = 3, -+ .data_type = data_type, -+ }; -+ -+ if (count) -+ spirv_compiler_emit_constant_buffer(compiler, count, &range, ®); -+} -+ - static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, - struct vkd3d_shader_code *spirv) -@@ -9443,6 +9524,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; - struct vkd3d_shader_instruction_array instructions; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; -@@ -9450,24 +9532,28 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - if (parser->shader_desc.temp_count) - spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); - -+ spirv_compiler_emit_sm1_constant_buffer(compiler, &parser->shader_desc, -+ VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, VKD3D_DATA_FLOAT); -+ spirv_compiler_emit_sm1_constant_buffer(compiler, &parser->shader_desc, -+ VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, VKD3D_DATA_INT); -+ spirv_compiler_emit_sm1_constant_buffer(compiler, &parser->shader_desc, -+ VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, VKD3D_DATA_UINT); -+ - compiler->location.column = 0; - compiler->location.line = 1; - -+ if ((result = vkd3d_shader_normalise(parser)) < 0) -+ return result; -+ - instructions = parser->instructions; - memset(&parser->instructions, 0, sizeof(parser->instructions)); - -- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL -- && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) -- { -- result = instruction_array_normalise_hull_shader_control_point_io(&instructions, -- &compiler->input_signature); -- } -- if (result >= 0) -- result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, -- &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); -- -- if (result >= 0 && TRACE_ON()) -- vkd3d_shader_trace(&instructions, &parser->shader_version); -+ compiler->input_signature = shader_desc->input_signature; -+ compiler->output_signature = shader_desc->output_signature; -+ compiler->patch_constant_signature = shader_desc->patch_constant_signature; -+ memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); -+ memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); -+ memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); - - if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_shader_signature_outputs(compiler); -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 801c688a297..351943e2e53 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2627,7 +2627,8 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant - return true; - } - --static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) -+static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, -+ uint32_t tag, struct vkd3d_bytecode_buffer *buffer) - { - /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN - * sections to be aligned. Without this, the sections themselves will be -@@ -2635,6 +2636,9 @@ static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_byt - size_t size = bytecode_align(buffer); - - dxbc_writer_add_section(dxbc, tag, buffer->data, size); -+ -+ if (buffer->status < 0) -+ ctx->result = buffer->status; - } - - static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) -@@ -2742,7 +2746,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - - set_u32(&buffer, count_position, i); - -- add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); -+ add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); - } - - static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -@@ -2830,6 +2834,22 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) - return D3D_SVT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3D_SVT_VOID; -+ case HLSL_TYPE_UAV: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3D_SVT_RWTEXTURE1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3D_SVT_RWTEXTURE2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3D_SVT_RWTEXTURE3D; -+ case HLSL_SAMPLER_DIM_1DARRAY: -+ return D3D_SVT_RWTEXTURE1DARRAY; -+ case HLSL_SAMPLER_DIM_2DARRAY: -+ return D3D_SVT_RWTEXTURE2DARRAY; -+ default: -+ vkd3d_unreachable(); -+ } - default: - vkd3d_unreachable(); - } -@@ -3341,7 +3361,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(&buffer, creator_position, creator_offset); - -- add_section(dxbc, TAG_RDEF, &buffer); -+ add_section(ctx, dxbc, TAG_RDEF, &buffer); - - sm4_free_extern_resources(extern_resources, extern_resources_count); - } -@@ -4283,6 +4303,53 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - write_sm4_instruction(buffer, &instr); - } - -+static void write_sm4_sampleinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *dst = &load->node; -+ struct sm4_instruction instr; -+ -+ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -+ if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ instr.opcode |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_deref(ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); -+ instr.src_count = 1; -+ -+ write_sm4_instruction(buffer, &instr); -+} -+ -+static void write_sm4_resinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *dst = &load->node; -+ struct sm4_instruction instr; -+ -+ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_RESINFO; -+ if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ instr.opcode |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(buffer, &instr); -+} -+ - static bool type_is_float(const struct hlsl_type *type) - { - return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; -@@ -5101,6 +5168,14 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, - write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, - HLSL_SWIZZLE(W, W, W, W), texel_offset); - break; -+ -+ case HLSL_RESOURCE_SAMPLE_INFO: -+ write_sm4_sampleinfo(ctx, buffer, load); -+ break; -+ -+ case HLSL_RESOURCE_RESINFO: -+ write_sm4_resinfo(ctx, buffer, load); -+ break; - } - } - -@@ -5306,7 +5381,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - -- add_section(dxbc, TAG_SHDR, &buffer); -+ add_section(ctx, dxbc, TAG_SHDR, &buffer); - - sm4_free_extern_resources(extern_resources, extern_resources_count); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 33d8c60e59a..d59cd704ceb 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1152,6 +1152,15 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - } - } - -+ for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) -+ { -+ struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; -+ -+ if (parser->shader_desc.flat_constant_count[i].external) -+ vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, -+ &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); -+ } -+ - if (!ret && signature_info) - { - if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 0e93f3a556a..d35f49a63a2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -507,6 +507,7 @@ enum vkd3d_shader_register_type - VKD3DSPR_DEPTHOUTLE, - VKD3DSPR_RASTERIZER, - VKD3DSPR_OUTSTENCILREF, -+ VKD3DSPR_UNDEF, - - VKD3DSPR_INVALID = ~0u, - }; -@@ -840,6 +841,11 @@ struct vkd3d_shader_desc - struct shader_signature patch_constant_signature; - - uint32_t temp_count; -+ -+ struct -+ { -+ uint32_t used, external; -+ } flat_constant_count[3]; - }; - - struct vkd3d_shader_register_semantic -@@ -971,6 +977,8 @@ struct vkd3d_shader_instruction - } declaration; - }; - -+void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx); -+ - static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) - { - return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; -@@ -1398,11 +1406,6 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void - void dxbc_writer_init(struct dxbc_writer *dxbc); - int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); - --enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); --enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( -- struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); --enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, -- enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, -- struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); -+enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser); - - #endif /* __VKD3D_SHADER_PRIVATE_H */ -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 53cb5d9582c..8b5f7899cf3 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -454,9 +454,9 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( - } - - /* ID3D12Fence */ --static struct d3d12_fence *impl_from_ID3D12Fence(ID3D12Fence *iface) -+static struct d3d12_fence *impl_from_ID3D12Fence1(ID3D12Fence1 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence1_iface); - } - - static VkResult d3d12_fence_create_vk_fence(struct d3d12_fence *fence, VkFence *vk_fence) -@@ -900,18 +900,19 @@ static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uin - vkd3d_mutex_unlock(&fence->mutex); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence1 *iface, - REFIID riid, void **object) - { - TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - -- if (IsEqualGUID(riid, &IID_ID3D12Fence) -+ if (IsEqualGUID(riid, &IID_ID3D12Fence1) -+ || IsEqualGUID(riid, &IID_ID3D12Fence) - || IsEqualGUID(riid, &IID_ID3D12Pageable) - || IsEqualGUID(riid, &IID_ID3D12DeviceChild) - || IsEqualGUID(riid, &IID_ID3D12Object) - || IsEqualGUID(riid, &IID_IUnknown)) - { -- ID3D12Fence_AddRef(iface); -+ ID3D12Fence1_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -922,9 +923,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence *iface) -+static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence1 *iface) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - ULONG refcount = InterlockedIncrement(&fence->refcount); - - TRACE("%p increasing refcount to %u.\n", fence, refcount); -@@ -937,9 +938,9 @@ static void d3d12_fence_incref(struct d3d12_fence *fence) - InterlockedIncrement(&fence->internal_refcount); - } - --static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence *iface) -+static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence1 *iface) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - ULONG refcount = InterlockedDecrement(&fence->refcount); - - TRACE("%p decreasing refcount to %u.\n", fence, refcount); -@@ -972,10 +973,10 @@ static void d3d12_fence_decref(struct d3d12_fence *fence) - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence1 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -983,10 +984,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, - return vkd3d_get_private_data(&fence->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence1 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -994,37 +995,37 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, - return vkd3d_set_private_data(&fence->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence1 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&fence->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence1 *iface, const WCHAR *name) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, fence->device->wchar_size)); - - return name ? S_OK : E_INVALIDARG; - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence *iface, REFIID iid, void **device) -+static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence1 *iface, REFIID iid, void **device) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); - - return d3d12_device_query_interface(fence->device, iid, device); - } - --static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface) -+static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence1 *iface) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - uint64_t completed_value; - - TRACE("iface %p.\n", iface); -@@ -1035,10 +1036,10 @@ static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface - return completed_value; - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence1 *iface, - UINT64 value, HANDLE event) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - unsigned int i; - bool latch = false; - -@@ -1106,9 +1107,9 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen - return d3d12_device_flush_blocked_queues(fence->device); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) -+static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence1 *iface, UINT64 value) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, value %#"PRIx64".\n", iface, value); - -@@ -1117,7 +1118,16 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 v - return d3d12_fence_signal(fence, value, VK_NULL_HANDLE, true); - } - --static const struct ID3D12FenceVtbl d3d12_fence_vtbl = -+static D3D12_FENCE_FLAGS STDMETHODCALLTYPE d3d12_fence_GetCreationFlags(ID3D12Fence1 *iface) -+{ -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); -+ -+ TRACE("iface %p.\n", iface); -+ -+ return fence->flags; -+} -+ -+static const struct ID3D12Fence1Vtbl d3d12_fence_vtbl = - { - /* IUnknown methods */ - d3d12_fence_QueryInterface, -@@ -1134,14 +1144,18 @@ static const struct ID3D12FenceVtbl d3d12_fence_vtbl = - d3d12_fence_GetCompletedValue, - d3d12_fence_SetEventOnCompletion, - d3d12_fence_Signal, -+ /* ID3D12Fence1 methods */ -+ d3d12_fence_GetCreationFlags, - }; - - static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) - { -- if (!iface) -+ ID3D12Fence1 *iface1; -+ -+ if (!(iface1 = (ID3D12Fence1 *)iface)) - return NULL; -- assert(iface->lpVtbl == &d3d12_fence_vtbl); -- return impl_from_ID3D12Fence(iface); -+ assert(iface1->lpVtbl == &d3d12_fence_vtbl); -+ return impl_from_ID3D12Fence1(iface1); - } - - static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, -@@ -1151,7 +1165,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * - VkResult vr; - HRESULT hr; - -- fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl; -+ fence->ID3D12Fence1_iface.lpVtbl = &d3d12_fence_vtbl; - fence->internal_refcount = 1; - fence->refcount = 1; - -@@ -1162,7 +1176,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * - - vkd3d_cond_init(&fence->null_event_cond); - -- if (flags) -+ if ((fence->flags = flags)) - FIXME("Ignoring flags %#x.\n", flags); - - fence->events = NULL; -@@ -1316,32 +1330,26 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm - return hr; - } - -- allocator->current_command_list = list; -- -- return S_OK; --} -- --static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator, -- struct d3d12_command_list *list) --{ -- struct d3d12_device *device = allocator->device; -- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -- -- TRACE("allocator %p, list %p.\n", allocator, list); -- -- if (allocator->current_command_list == list) -- allocator->current_command_list = NULL; -- - if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size, - allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers))) - { - WARN("Failed to add command buffer.\n"); - VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, - 1, &list->vk_command_buffer)); -- return; -+ return E_OUTOFMEMORY; - } -- - allocator->command_buffers[allocator->command_buffer_count++] = list->vk_command_buffer; -+ -+ allocator->current_command_list = list; -+ -+ return S_OK; -+} -+ -+static void d3d12_command_allocator_remove_command_list(struct d3d12_command_allocator *allocator, -+ const struct d3d12_command_list *list) -+{ -+ if (allocator->current_command_list == list) -+ allocator->current_command_list = NULL; - } - - static bool d3d12_command_allocator_add_render_pass(struct d3d12_command_allocator *allocator, VkRenderPass pass) -@@ -1911,10 +1919,32 @@ HRESULT d3d12_command_allocator_create(struct d3d12_device *device, - return S_OK; - } - -+static void d3d12_command_signature_incref(struct d3d12_command_signature *signature) -+{ -+ vkd3d_atomic_increment(&signature->internal_refcount); -+} -+ -+static void d3d12_command_signature_decref(struct d3d12_command_signature *signature) -+{ -+ unsigned int refcount = vkd3d_atomic_decrement(&signature->internal_refcount); -+ -+ if (!refcount) -+ { -+ struct d3d12_device *device = signature->device; -+ -+ vkd3d_private_store_destroy(&signature->private_store); -+ -+ vkd3d_free((void *)signature->desc.pArgumentDescs); -+ vkd3d_free(signature); -+ -+ d3d12_device_release(device); -+ } -+} -+ - /* ID3D12CommandList */ --static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList2(ID3D12GraphicsCommandList2 *iface) -+static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList3(ID3D12GraphicsCommandList3 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); - } - - static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) -@@ -2260,12 +2290,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList3 *iface, - REFIID iid, void **object) - { - TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); - -- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) -+ if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) -+ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList) - || IsEqualGUID(iid, &IID_ID3D12CommandList) -@@ -2273,7 +2304,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - || IsEqualGUID(iid, &IID_ID3D12Object) - || IsEqualGUID(iid, &IID_IUnknown)) - { -- ID3D12GraphicsCommandList2_AddRef(iface); -+ ID3D12GraphicsCommandList3_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -2284,9 +2315,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList2 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - ULONG refcount = InterlockedIncrement(&list->refcount); - - TRACE("%p increasing refcount to %u.\n", list, refcount); -@@ -2299,9 +2330,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind - vkd3d_free(bindings->vk_uav_counter_views); - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList2 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - ULONG refcount = InterlockedDecrement(&list->refcount); - - TRACE("%p decreasing refcount to %u.\n", list, refcount); -@@ -2314,7 +2345,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL - - /* When command pool is destroyed, all command buffers are implicitly freed. */ - if (list->allocator) -- d3d12_command_allocator_free_command_buffer(list->allocator, list); -+ d3d12_command_allocator_remove_command_list(list->allocator, list); - - vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE]); - vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS]); -@@ -2327,66 +2358,66 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList3 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_get_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList3 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_set_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList3 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&list->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList2 *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList3 *iface, const WCHAR *name) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); - - return name ? S_OK : E_INVALIDARG; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList2 *iface, REFIID iid, void **device) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList3 *iface, REFIID iid, void **device) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); - - return d3d12_device_query_interface(list->device, iid, device); - } - --static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList2 *iface) -+static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p.\n", iface); - - return list->type; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList2 *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - VkResult vr; - -@@ -2412,7 +2443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL - - if (list->allocator) - { -- d3d12_command_allocator_free_command_buffer(list->allocator, list); -+ d3d12_command_allocator_remove_command_list(list->allocator, list); - list->allocator = NULL; - } - -@@ -2430,7 +2461,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL - static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - ID3D12PipelineState *initial_pipeline_state) - { -- ID3D12GraphicsCommandList2 *iface = &list->ID3D12GraphicsCommandList2_iface; -+ ID3D12GraphicsCommandList3 *iface = &list->ID3D12GraphicsCommandList3_iface; - - memset(list->strides, 0, sizeof(list->strides)); - list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; -@@ -2466,14 +2497,14 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - - list->descriptor_heap_count = 0; - -- ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); -+ ID3D12GraphicsCommandList3_SetPipelineState(iface, initial_pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList3 *iface, - ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) - { - struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - HRESULT hr; - - TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", -@@ -2500,7 +2531,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL - return hr; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList3 *iface, - ID3D12PipelineState *pipeline_state) - { - FIXME("iface %p, pipline_state %p stub!\n", iface, pipeline_state); -@@ -3186,6 +3217,20 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) - } - } - -+static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) -+{ -+ if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) -+ { -+ if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) -+ { -+ /* Descriptors can be written after binding. */ -+ FIXME("Flushing descriptor updates while list %p is not closed.\n", list); -+ command_list_flush_vk_heap_updates(list); -+ } -+ list->descriptor_heaps[list->descriptor_heap_count++] = heap; -+ } -+} -+ - static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, - enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) - { -@@ -3210,18 +3255,6 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l - bindings->sampler_heap_id = heap->serial_id; - } - -- if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) -- { -- if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) -- { -- /* Descriptors can be written after binding. */ -- FIXME("Flushing descriptor updates while list %p is not closed.\n", list); -- command_list_flush_vk_heap_updates(list); -- list->descriptor_heap_count = 0; -- } -- list->descriptor_heaps[list->descriptor_heap_count++] = heap; -- } -- - vkd3d_mutex_lock(&heap->vk_sets_mutex); - - for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) -@@ -3354,11 +3387,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList3 *iface, - UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, - UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " -@@ -3378,11 +3411,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom - instance_count, start_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList3 *iface, - UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, - INT base_vertex_location, UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " -@@ -3404,10 +3437,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap - instance_count, start_vertex_location, base_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList3 *iface, - UINT x, UINT y, UINT z) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); -@@ -3423,10 +3456,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL - VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; - VkBufferCopy buffer_copy; -@@ -3708,11 +3741,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) - && box->back > box->front; - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList3 *iface, - const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, - const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_format *src_format, *dst_format; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3833,10 +3866,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst, ID3D12Resource *src) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_format *dst_format, *src_format; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3903,7 +3936,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, - const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, - D3D12_TILE_COPY_FLAGS flags) -@@ -3914,11 +3947,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand - buffer, buffer_offset, flags); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst, UINT dst_sub_resource_idx, - ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_format *src_format, *dst_format, *vk_format; - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3981,10 +4014,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList3 *iface, - D3D12_PRIMITIVE_TOPOLOGY topology) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, topology %#x.\n", iface, topology); - -@@ -3995,11 +4028,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList3 *iface, - UINT viewport_count, const D3D12_VIEWPORT *viewports) - { - VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; - -@@ -4033,10 +4066,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo - VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList3 *iface, - UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -4061,10 +4094,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic - VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList3 *iface, - const FLOAT blend_factor[4]) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); -@@ -4073,10 +4106,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics - VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList3 *iface, - UINT stencil_ref) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); -@@ -4085,11 +4118,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC - VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList3 *iface, - ID3D12PipelineState *pipeline_state) - { - struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); - -@@ -4140,10 +4173,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA - return 0; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList3 *iface, - UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - bool have_aliasing_barriers = false, have_split_barriers = false; - const struct vkd3d_vk_device_procs *vk_procs; - const struct vkd3d_vulkan_info *vk_info; -@@ -4366,13 +4399,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList3 *iface, - ID3D12GraphicsCommandList *command_list) - { - FIXME("iface %p, command_list %p stub!\n", iface, command_list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList3 *iface, - UINT heap_count, ID3D12DescriptorHeap *const *heaps) - { - TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); -@@ -4398,10 +4431,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis - d3d12_command_list_invalidate_root_parameters(list, bind_point); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList3 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4409,10 +4442,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G - unsafe_impl_from_ID3D12RootSignature(root_signature)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList3 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4425,6 +4458,7 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - { - struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; - const struct d3d12_root_signature *root_signature = bindings->root_signature; -+ struct d3d12_descriptor_heap *descriptor_heap; - struct d3d12_desc *desc; - - assert(root_signature_get_descriptor_table(root_signature, index)); -@@ -4435,15 +4469,25 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - if (bindings->descriptor_tables[index] == desc) - return; - -+ descriptor_heap = d3d12_desc_get_descriptor_heap(desc); -+ if (!(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) -+ { -+ /* GetGPUDescriptorHandleForHeapStart() returns a null handle in this case, -+ * but a CPU handle could be passed. */ -+ WARN("Descriptor heap %p is not shader visible.\n", descriptor_heap); -+ return; -+ } -+ command_list_add_descriptor_heap(list, descriptor_heap); -+ - bindings->descriptor_tables[index] = desc; - bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; - bindings->descriptor_table_active_mask |= (uint64_t)1 << index; - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", - iface, root_parameter_index, base_descriptor.ptr); -@@ -4452,10 +4496,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I - root_parameter_index, base_descriptor); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", - iface, root_parameter_index, base_descriptor.ptr); -@@ -4477,10 +4521,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis - c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4489,10 +4533,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4501,10 +4545,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4513,10 +4557,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID - root_parameter_index, dst_offset, constant_count, data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4578,9 +4622,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4589,9 +4633,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4650,9 +4694,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4662,9 +4706,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4674,9 +4718,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4686,9 +4730,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4697,10 +4741,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV - root_parameter_index, address); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList3 *iface, - const D3D12_INDEX_BUFFER_VIEW *view) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - struct d3d12_resource *resource; - enum VkIndexType index_type; -@@ -4740,10 +4784,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics - view->BufferLocation - resource->gpu_address, index_type)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList3 *iface, - UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_null_resources *null_resources; - struct vkd3d_gpu_va_allocator *gpu_va_allocator; - VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; -@@ -4798,10 +4842,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList3 *iface, - UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; - VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; - VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; -@@ -4863,11 +4907,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm - VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList3 *iface, - UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, - BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct d3d12_rtv_desc *rtv_desc; - const struct d3d12_dsv_desc *dsv_desc; - VkFormat prev_dsv_format; -@@ -5068,12 +5112,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList3 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, - UINT rect_count, const D3D12_RECT *rects) - { - const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference ds_reference; -@@ -5117,10 +5161,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra - &clear_value, rect_count, rects); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList3 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference color_reference; -@@ -5365,11 +5409,11 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList3 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const UINT values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct vkd3d_view *descriptor, *uint_view = NULL; - struct d3d12_device *device = list->device; - struct vkd3d_texture_view_desc view_desc; -@@ -5431,11 +5475,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - vkd3d_view_decref(uint_view, device); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList3 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const float values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *resource_impl; - VkClearColorValue colour; - struct vkd3d_view *view; -@@ -5451,16 +5495,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I - d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) - { - FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList3 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - VkQueryControlFlags flags = 0; -@@ -5487,10 +5531,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman - VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList3 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - -@@ -5532,12 +5576,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) - return sizeof(uint64_t); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList3 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, - ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) - { - const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i, first, count; -@@ -5613,10 +5657,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); - const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -5685,19 +5729,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList3 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList3 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList2 *iface) -+static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList3 *iface) - { - FIXME("iface %p stub!\n", iface); - } -@@ -5706,14 +5750,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN - STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); - STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList3 *iface, - ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, - UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) - { - struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); - struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); - struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -5731,6 +5775,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - return; - } - -+ d3d12_command_signature_incref(sig_impl); -+ - signature_desc = &sig_impl->desc; - for (i = 0; i < signature_desc->NumArgumentDescs; ++i) - { -@@ -5793,6 +5839,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - if (!d3d12_command_list_update_compute_state(list)) - { - WARN("Failed to update compute state, ignoring dispatch.\n"); -+ d3d12_command_signature_decref(sig_impl); - return; - } - -@@ -5805,9 +5852,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - break; - } - } -+ -+ d3d12_command_signature_decref(sig_impl); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5820,7 +5869,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5833,20 +5882,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList3 *iface, - FLOAT min, FLOAT max) - { - FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList3 *iface, - UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) - { - FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", - iface, sample_count, pixel_count, sample_positions); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, - ID3D12Resource *src_resource, UINT src_sub_resource_idx, - D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) -@@ -5858,16 +5907,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 - src_resource, src_sub_resource_idx, src_rect, format, mode); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList2 *iface, UINT mask) -+static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList3 *iface, UINT mask) - { - FIXME("iface %p, mask %#x stub!\n", iface, mask); - } - --static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList3 *iface, - UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, - const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *resource; - unsigned int i; - -@@ -5880,7 +5929,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap - } - } - --static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = -+static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList3 *iface, -+ ID3D12ProtectedResourceSession *protected_session) -+{ -+ FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); -+} -+ -+static const struct ID3D12GraphicsCommandList3Vtbl d3d12_command_list_vtbl = - { - /* IUnknown methods */ - d3d12_command_list_QueryInterface, -@@ -5956,6 +6011,8 @@ static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = - d3d12_command_list_SetViewInstanceMask, - /* ID3D12GraphicsCommandList2 methods */ - d3d12_command_list_WriteBufferImmediate, -+ /* ID3D12GraphicsCommandList3 methods */ -+ d3d12_command_list_SetProtectedResourceSession, - }; - - static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) -@@ -5963,7 +6020,7 @@ static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12Comma - if (!iface) - return NULL; - assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); - } - - static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, -@@ -5972,7 +6029,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d - { - HRESULT hr; - -- list->ID3D12GraphicsCommandList2_iface.lpVtbl = &d3d12_command_list_vtbl; -+ list->ID3D12GraphicsCommandList3_iface.lpVtbl = &d3d12_command_list_vtbl; - list->refcount = 1; - - list->type = type; -@@ -7299,16 +7356,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSign - TRACE("%p decreasing refcount to %u.\n", signature, refcount); - - if (!refcount) -- { -- struct d3d12_device *device = signature->device; -- -- vkd3d_private_store_destroy(&signature->private_store); -- -- vkd3d_free((void *)signature->desc.pArgumentDescs); -- vkd3d_free(signature); -- -- d3d12_device_release(device); -- } -+ d3d12_command_signature_decref(signature); - - return refcount; - } -@@ -7415,6 +7463,7 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_ - - object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl; - object->refcount = 1; -+ object->internal_refcount = 1; - - object->desc = *desc; - if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs)))) -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index b9a8943cc08..a2e1f13dec3 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -2657,8 +2657,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *if - initial_pipeline_state, &object))) - return hr; - -- return return_interface(&object->ID3D12GraphicsCommandList2_iface, -- &IID_ID3D12GraphicsCommandList2, riid, command_list); -+ return return_interface(&object->ID3D12GraphicsCommandList3_iface, -+ &IID_ID3D12GraphicsCommandList3, riid, command_list); - } - - /* Direct3D feature levels restrict which formats can be optionally supported. */ -@@ -3414,6 +3414,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, - struct d3d12_device *device = impl_from_ID3D12Device(iface); - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - unsigned int dst_range_size, src_range_size; -+ struct d3d12_descriptor_heap *dst_heap; - const struct d3d12_desc *src; - struct d3d12_desc *dst; - -@@ -3443,13 +3444,14 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, - src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; - - dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); -+ dst_heap = d3d12_desc_get_descriptor_heap(dst); - src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); - - for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) - { - if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) - continue; -- d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); -+ d3d12_desc_copy(&dst[dst_idx], &src[src_idx], dst_heap, device); - } - - if (dst_idx >= dst_range_size) -@@ -3747,7 +3749,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, - if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object))) - return hr; - -- return return_interface(&object->ID3D12Fence_iface, &IID_ID3D12Fence, riid, fence); -+ return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); - } - - static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device *iface) -@@ -3891,12 +3893,18 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface - UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, - D3D12_SUBRESOURCE_TILING *sub_resource_tilings) - { -- FIXME("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " -+ const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); -+ struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ -+ TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " - "standard_title_shape %p, sub_resource_tiling_count %p, " -- "first_sub_resource_tiling %u, sub_resource_tilings %p stub!\n", -+ "first_sub_resource_tiling %u, sub_resource_tilings %p.\n", - iface, resource, total_tile_count, packed_mip_info, standard_tile_shape, - sub_resource_tiling_count, first_sub_resource_tiling, - sub_resource_tilings); -+ -+ d3d12_resource_get_tiling(device, resource_impl, total_tile_count, packed_mip_info, standard_tile_shape, -+ sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); - } - - static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, LUID *luid) -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 4c07d326504..cd3856c2937 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -971,6 +971,11 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - return hr; - } - -+static void d3d12_resource_tile_info_cleanup(struct d3d12_resource *resource) -+{ -+ vkd3d_free(resource->tiles.subresources); -+} -+ - static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12_device *device) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -@@ -986,6 +991,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 - else - VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); - -+ d3d12_resource_tile_info_cleanup(resource); -+ - if (resource->heap) - d3d12_heap_resource_destroyed(resource->heap); - } -@@ -1057,9 +1064,193 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, - box->back = d3d12_resource_desc_get_depth(&resource->desc, level); - } - --static void d3d12_resource_init_tiles(struct d3d12_resource *resource) -+static void compute_image_subresource_size_in_tiles(const VkExtent3D *tile_extent, -+ const struct D3D12_RESOURCE_DESC *desc, unsigned int miplevel_idx, -+ struct vkd3d_tiled_region_extent *size) -+{ -+ unsigned int width, height, depth; -+ -+ width = d3d12_resource_desc_get_width(desc, miplevel_idx); -+ height = d3d12_resource_desc_get_height(desc, miplevel_idx); -+ depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); -+ size->width = (width + tile_extent->width - 1) / tile_extent->width; -+ size->height = (height + tile_extent->height - 1) / tile_extent->height; -+ size->depth = (depth + tile_extent->depth - 1) / tile_extent->depth; -+} -+ -+void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, -+ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, -+ UINT *subresource_tiling_count, UINT first_subresource_tiling, -+ D3D12_SUBRESOURCE_TILING *subresource_tilings) - { -- resource->tiles.subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); -+ unsigned int i, subresource, subresource_count, miplevel_idx, count; -+ const struct vkd3d_subresource_tile_info *tile_info; -+ const VkExtent3D *tile_extent; -+ -+ tile_extent = &resource->tiles.tile_extent; -+ -+ if (packed_mip_info) -+ { -+ packed_mip_info->NumStandardMips = resource->tiles.standard_mip_count; -+ packed_mip_info->NumPackedMips = resource->desc.MipLevels - packed_mip_info->NumStandardMips; -+ packed_mip_info->NumTilesForPackedMips = !!resource->tiles.packed_mip_tile_count; /* non-zero dummy value */ -+ packed_mip_info->StartTileIndexInOverallResource = packed_mip_info->NumPackedMips -+ ? resource->tiles.subresources[resource->tiles.standard_mip_count].offset : 0; -+ } -+ -+ if (standard_tile_shape) -+ { -+ /* D3D12 docs say tile shape is cleared to zero if there is no standard mip, but drivers don't to do this. */ -+ standard_tile_shape->WidthInTexels = tile_extent->width; -+ standard_tile_shape->HeightInTexels = tile_extent->height; -+ standard_tile_shape->DepthInTexels = tile_extent->depth; -+ } -+ -+ if (total_tile_count) -+ *total_tile_count = resource->tiles.total_count; -+ -+ if (!subresource_tiling_count) -+ return; -+ -+ subresource_count = resource->tiles.subresource_count; -+ -+ count = subresource_count - min(first_subresource_tiling, subresource_count); -+ count = min(count, *subresource_tiling_count); -+ -+ for (i = 0; i < count; ++i) -+ { -+ subresource = i + first_subresource_tiling; -+ miplevel_idx = subresource % resource->desc.MipLevels; -+ if (miplevel_idx >= resource->tiles.standard_mip_count) -+ { -+ memset(&subresource_tilings[i], 0, sizeof(subresource_tilings[i])); -+ subresource_tilings[i].StartTileIndexInOverallResource = D3D12_PACKED_TILE; -+ continue; -+ } -+ -+ tile_info = &resource->tiles.subresources[subresource]; -+ subresource_tilings[i].StartTileIndexInOverallResource = tile_info->offset; -+ subresource_tilings[i].WidthInTiles = tile_info->extent.width; -+ subresource_tilings[i].HeightInTiles = tile_info->extent.height; -+ subresource_tilings[i].DepthInTiles = tile_info->extent.depth; -+ } -+ *subresource_tiling_count = i; -+} -+ -+static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3d12_device *device) -+{ -+ unsigned int i, start_idx, subresource_count, tile_count, miplevel_idx; -+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -+ VkSparseImageMemoryRequirements *sparse_requirements_array; -+ VkSparseImageMemoryRequirements sparse_requirements = {0}; -+ struct vkd3d_subresource_tile_info *tile_info; -+ VkMemoryRequirements requirements; -+ const VkExtent3D *tile_extent; -+ uint32_t requirement_count; -+ -+ subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); -+ -+ if (!(resource->tiles.subresources = vkd3d_calloc(subresource_count, sizeof(*resource->tiles.subresources)))) -+ { -+ ERR("Failed to allocate subresource info array.\n"); -+ return false; -+ } -+ -+ if (d3d12_resource_is_buffer(resource)) -+ { -+ assert(subresource_count == 1); -+ -+ VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); -+ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -+ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); -+ -+ tile_info = &resource->tiles.subresources[0]; -+ tile_info->offset = 0; -+ tile_info->extent.width = align(resource->desc.Width, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -+ / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; -+ tile_info->extent.height = 1; -+ tile_info->extent.depth = 1; -+ tile_info->count = tile_info->extent.width; -+ -+ resource->tiles.tile_extent.width = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; -+ resource->tiles.tile_extent.height = 1; -+ resource->tiles.tile_extent.depth = 1; -+ resource->tiles.total_count = tile_info->extent.width; -+ resource->tiles.subresource_count = 1; -+ resource->tiles.standard_mip_count = 1; -+ resource->tiles.packed_mip_tile_count = 0; -+ } -+ else -+ { -+ VK_CALL(vkGetImageMemoryRequirements(device->vk_device, resource->u.vk_image, &requirements)); -+ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -+ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); -+ -+ requirement_count = 0; -+ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, &requirement_count, NULL)); -+ if (!(sparse_requirements_array = vkd3d_calloc(requirement_count, sizeof(*sparse_requirements_array)))) -+ { -+ ERR("Failed to allocate sparse requirements array.\n"); -+ return false; -+ } -+ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, -+ &requirement_count, sparse_requirements_array)); -+ -+ for (i = 0; i < requirement_count; ++i) -+ { -+ if (sparse_requirements_array[i].formatProperties.aspectMask & resource->format->vk_aspect_mask) -+ { -+ if (sparse_requirements.formatProperties.aspectMask) -+ { -+ WARN("Ignoring properties for aspect mask %#x.\n", -+ sparse_requirements_array[i].formatProperties.aspectMask); -+ } -+ else -+ { -+ sparse_requirements = sparse_requirements_array[i]; -+ } -+ } -+ } -+ vkd3d_free(sparse_requirements_array); -+ if (!sparse_requirements.formatProperties.aspectMask) -+ { -+ WARN("Failed to get sparse requirements.\n"); -+ return false; -+ } -+ -+ resource->tiles.tile_extent = sparse_requirements.formatProperties.imageGranularity; -+ resource->tiles.subresource_count = subresource_count; -+ resource->tiles.standard_mip_count = sparse_requirements.imageMipTailSize -+ ? sparse_requirements.imageMipTailFirstLod : resource->desc.MipLevels; -+ resource->tiles.packed_mip_tile_count = (resource->tiles.standard_mip_count < resource->desc.MipLevels) -+ ? sparse_requirements.imageMipTailSize / requirements.alignment : 0; -+ -+ for (i = 0, start_idx = 0; i < subresource_count; ++i) -+ { -+ miplevel_idx = i % resource->desc.MipLevels; -+ -+ tile_extent = &sparse_requirements.formatProperties.imageGranularity; -+ tile_info = &resource->tiles.subresources[i]; -+ compute_image_subresource_size_in_tiles(tile_extent, &resource->desc, miplevel_idx, &tile_info->extent); -+ tile_info->offset = start_idx; -+ tile_info->count = 0; -+ -+ if (miplevel_idx < resource->tiles.standard_mip_count) -+ { -+ tile_count = tile_info->extent.width * tile_info->extent.height * tile_info->extent.depth; -+ start_idx += tile_count; -+ tile_info->count = tile_count; -+ } -+ else if (miplevel_idx == resource->tiles.standard_mip_count) -+ { -+ tile_info->count = 1; /* Non-zero dummy value */ -+ start_idx += 1; -+ } -+ } -+ resource->tiles.total_count = start_idx; -+ } -+ -+ return true; - } - - /* ID3D12Resource */ -@@ -2013,7 +2204,11 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - desc, initial_state, optimized_clear_value, &object))) - return hr; - -- d3d12_resource_init_tiles(object); -+ if (!d3d12_resource_init_tiles(object, device)) -+ { -+ d3d12_resource_Release(&object->ID3D12Resource_iface); -+ return E_OUTOFMEMORY; -+ } - - TRACE("Created reserved resource %p.\n", object); - -@@ -2411,13 +2606,11 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr - descriptor_writes_free_object_refs(&writes, device); - } - --static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) -+static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_descriptor_heap *descriptor_heap) - { -- struct d3d12_descriptor_heap *descriptor_heap; - unsigned int i, head; - - i = dst->index; -- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); - head = descriptor_heap->dirty_list_head; - - /* Only one thread can swap the value away from zero. */ -@@ -2431,14 +2624,20 @@ static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) - } - } - --void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, -- struct d3d12_device *device) -+static inline void descriptor_heap_write_atomic(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_desc *dst, -+ const struct d3d12_desc *src, struct d3d12_device *device) - { - void *object = src->s.u.object; - - d3d12_desc_replace(dst, object, device); -- if (device->use_vk_heaps && object && !dst->next) -- d3d12_desc_mark_as_modified(dst); -+ if (descriptor_heap->use_vk_heaps && object && !dst->next) -+ d3d12_desc_mark_as_modified(dst, descriptor_heap); -+} -+ -+void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, -+ struct d3d12_device *device) -+{ -+ descriptor_heap_write_atomic(d3d12_desc_get_descriptor_heap(dst), dst, src, device); - } - - static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) -@@ -2446,7 +2645,9 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic - d3d12_desc_replace(descriptor, NULL, device); - } - --void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, -+/* This is a major performance bottleneck for some games, so do not load the device -+ * pointer from dst_heap. In some cases device will not be used. */ -+void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, - struct d3d12_device *device) - { - struct d3d12_desc tmp; -@@ -2454,7 +2655,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, - assert(dst != src); - - tmp.s.u.object = d3d12_desc_get_object_ref(src, device); -- d3d12_desc_write_atomic(dst, &tmp, device); -+ descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); - } - - static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, -@@ -3853,7 +4054,15 @@ static D3D12_GPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_Get - - TRACE("iface %p, descriptor %p.\n", iface, descriptor); - -- descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; -+ if (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) -+ { -+ descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; -+ } -+ else -+ { -+ WARN("Heap %p is not shader-visible.\n", iface); -+ descriptor->ptr = 0; -+ } - - return descriptor; - } -@@ -3956,7 +4165,7 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri - descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; - memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); - -- if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV -+ if (!descriptor_heap->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV - && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) - return S_OK; - -@@ -3987,6 +4196,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript - if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) - return hr; - -+ descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); - d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); - vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index c5259420acf..4bd6812b16e 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -253,6 +253,11 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) - { - } - -+static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) -+{ -+ return InterlockedIncrement((LONG volatile *)x); -+} -+ - static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) - { - return InterlockedDecrement((LONG volatile *)x); -@@ -387,6 +392,15 @@ static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) - } - # else - # error "vkd3d_atomic_decrement() not implemented for this platform" -+# endif /* HAVE_SYNC_SUB_AND_FETCH */ -+ -+# if HAVE_SYNC_ADD_AND_FETCH -+static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) -+{ -+ return __sync_add_and_fetch(x, 1); -+} -+# else -+# error "vkd3d_atomic_increment() not implemented for this platform" - # endif /* HAVE_SYNC_ADD_AND_FETCH */ - - # if HAVE_SYNC_BOOL_COMPARE_AND_SWAP -@@ -602,10 +616,12 @@ struct vkd3d_signaled_semaphore - /* ID3D12Fence */ - struct d3d12_fence - { -- ID3D12Fence ID3D12Fence_iface; -+ ID3D12Fence1 ID3D12Fence1_iface; - LONG internal_refcount; - LONG refcount; - -+ D3D12_FENCE_FLAGS flags; -+ - uint64_t value; - uint64_t max_pending_value; - struct vkd3d_mutex mutex; -@@ -673,9 +689,28 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); - #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 - #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 - -+struct vkd3d_tiled_region_extent -+{ -+ unsigned int width; -+ unsigned int height; -+ unsigned int depth; -+}; -+ -+struct vkd3d_subresource_tile_info -+{ -+ unsigned int offset; -+ unsigned int count; -+ struct vkd3d_tiled_region_extent extent; -+}; -+ - struct d3d12_resource_tile_info - { -+ VkExtent3D tile_extent; -+ unsigned int total_count; -+ unsigned int standard_mip_count; -+ unsigned int packed_mip_tile_count; - unsigned int subresource_count; -+ struct vkd3d_subresource_tile_info *subresources; - }; - - /* ID3D12Resource */ -@@ -728,6 +763,10 @@ static inline bool d3d12_resource_is_texture(const struct d3d12_resource *resour - - bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); - HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d12_device *device); -+void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, -+ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, -+ UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, -+ D3D12_SUBRESOURCE_TILING *sub_resource_tilings); - - HRESULT d3d12_committed_resource_create(struct d3d12_device *device, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, -@@ -868,8 +907,9 @@ static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc * - { - do - { -- view = src->s.u.object; -- } while (view && !vkd3d_view_incref(view)); -+ if (!(view = src->s.u.object)) -+ return NULL; -+ } while (!vkd3d_view_incref(view)); - - /* Check if the object is still in src to handle the case where it was - * already freed and reused elsewhere when the refcount was incremented. */ -@@ -895,7 +935,10 @@ static inline void d3d12_desc_copy_raw(struct d3d12_desc *dst, const struct d3d1 - dst->s = src->s; - } - --void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); -+struct d3d12_descriptor_heap; -+ -+void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, -+ struct d3d12_device *device); - void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, - struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); - void d3d12_desc_create_srv(struct d3d12_desc *descriptor, -@@ -998,6 +1041,7 @@ struct d3d12_descriptor_heap - D3D12_DESCRIPTOR_HEAP_DESC desc; - - struct d3d12_device *device; -+ bool use_vk_heaps; - - struct vkd3d_private_store private_store; - -@@ -1382,7 +1426,7 @@ enum vkd3d_pipeline_bind_point - /* ID3D12CommandList */ - struct d3d12_command_list - { -- ID3D12GraphicsCommandList2 ID3D12GraphicsCommandList2_iface; -+ ID3D12GraphicsCommandList3 ID3D12GraphicsCommandList3_iface; - LONG refcount; - - D3D12_COMMAND_LIST_TYPE type; -@@ -1575,6 +1619,7 @@ struct d3d12_command_signature - { - ID3D12CommandSignature ID3D12CommandSignature_iface; - LONG refcount; -+ unsigned int internal_refcount; - - D3D12_COMMAND_SIGNATURE_DESC desc; - --- -2.40.1 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-269747dbf3ee32bf23e6d1ab388d2a058ca.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-269747dbf3ee32bf23e6d1ab388d2a058ca.patch new file mode 100644 index 00000000..a9572ac3 --- /dev/null +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-269747dbf3ee32bf23e6d1ab388d2a058ca.patch @@ -0,0 +1,947 @@ +From 247ab6630dc34db194033b3721d30246c8fb8012 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Tue, 29 Aug 2023 07:21:37 +1000 +Subject: [PATCH] Updated vkd3d to 269747dbf3ee32bf23e6d1ab388d2a058ca90f9f. + +--- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 47 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 5 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 296 +++++++++--------- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 144 +++++++-- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 37 +++ + libs/vkd3d/libs/vkd3d/command.c | 5 +- + 7 files changed, 347 insertions(+), 188 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 0a8d3a692a3..070fec74326 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -356,6 +356,7 @@ struct hlsl_attribute + #define HLSL_MODIFIER_COLUMN_MAJOR 0x00000400 + #define HLSL_STORAGE_IN 0x00000800 + #define HLSL_STORAGE_OUT 0x00001000 ++#define HLSL_MODIFIER_INLINE 0x00002000 + + #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 29e0ff0c5be..43ea4b4d038 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -493,11 +493,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + || !strcmp(attr->name, "fastopt") + || !strcmp(attr->name, "allow_uav_condition")) + { +- hlsl_fixme(ctx, loc, "Unhandled attribute %s.", attr->name); ++ hlsl_fixme(ctx, loc, "Unhandled attribute '%s'.", attr->name); + } + else + { +- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unrecognized attribute %s.", attr->name); ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); + } + } + +@@ -5129,6 +5129,9 @@ func_prototype_no_attrs: + struct hlsl_ir_var *var; + struct hlsl_type *type; + ++ /* Functions are unconditionally inlined. */ ++ modifiers &= ~HLSL_MODIFIER_INLINE; ++ + if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Only majority modifiers are allowed on functions."); +@@ -5970,6 +5973,10 @@ var_modifiers: + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); + } ++ | KW_INLINE var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); ++ } + + + complex_initializer: +@@ -6108,19 +6115,39 @@ jump_statement: + } + + selection_statement: +- KW_IF '(' expr ')' if_body ++ attribute_list_optional KW_IF '(' expr ')' if_body + { +- struct hlsl_ir_node *condition = node_from_block($3); ++ struct hlsl_ir_node *condition = node_from_block($4); ++ const struct parse_attribute_list *attributes = &$1; + struct hlsl_ir_node *instr; ++ unsigned int i; ++ ++ if (attribute_list_has_duplicates(attributes)) ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); ++ ++ for (i = 0; i < attributes->count; ++i) ++ { ++ const struct hlsl_attribute *attr = attributes->attrs[i]; ++ ++ if (!strcmp(attr->name, "branch") ++ || !strcmp(attr->name, "flatten")) ++ { ++ hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, "Unhandled attribute '%s'.", attr->name); ++ } ++ else ++ { ++ hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); ++ } ++ } + +- if (!(instr = hlsl_new_if(ctx, condition, $5.then_block, $5.else_block, &@1))) ++ if (!(instr = hlsl_new_if(ctx, condition, $6.then_block, $6.else_block, &@2))) + { +- destroy_block($5.then_block); +- destroy_block($5.else_block); ++ destroy_block($6.then_block); ++ destroy_block($6.else_block); + YYABORT; + } +- destroy_block($5.then_block); +- destroy_block($5.else_block); ++ destroy_block($6.then_block); ++ destroy_block($6.else_block); + if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) + { + struct vkd3d_string_buffer *string; +@@ -6130,7 +6157,7 @@ selection_statement: + "if condition type %s is not scalar.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } +- $$ = $3; ++ $$ = $4; + hlsl_block_add_instr($$, instr); + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index bfa605f4ba7..bae8e5f9a5f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -321,9 +321,10 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + if (!semantic->name) + return; + +- vector_type_src = hlsl_get_vector_type(ctx, type->base_type, +- (ctx->profile->major_version < 4) ? 4 : hlsl_type_minor_size(type)); + vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); ++ vector_type_src = vector_type_dst; ++ if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) ++ vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4); + + for (i = 0; i < hlsl_type_major_size(type); ++i) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index fa605f185ae..9b3084538ba 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -204,11 +204,6 @@ static inline bool register_is_undef(const struct vkd3d_shader_register *reg) + return reg->type == VKD3DSPR_UNDEF; + } + +-static inline bool register_is_constant(const struct vkd3d_shader_register *reg) +-{ +- return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); +-} +- + static inline bool register_is_constant_or_undef(const struct vkd3d_shader_register *reg) + { + return register_is_constant(reg) || register_is_undef(reg); +@@ -2599,8 +2594,8 @@ static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_const + return NULL; + } + +-static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *compiler, +- const struct vkd3d_shader_resource *resource, const struct vkd3d_shader_sampler *sampler) ++static bool spirv_compiler_has_combined_sampler_for_resource(const struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register_range *range) + { + const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; + const struct vkd3d_shader_combined_resource_sampler *combined_sampler; +@@ -2609,10 +2604,35 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com + if (!shader_interface->combined_sampler_count) + return false; + +- if (resource && (resource->reg.reg.type == VKD3DSPR_UAV || resource->range.last != resource->range.first)) ++ if (range->last != range->first) ++ return false; ++ ++ for (i = 0; i < shader_interface->combined_sampler_count; ++i) ++ { ++ combined_sampler = &shader_interface->combined_samplers[i]; ++ ++ if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) ++ continue; ++ ++ if ((combined_sampler->resource_space == range->space ++ && combined_sampler->resource_index == range->first)) ++ return true; ++ } ++ ++ return false; ++} ++ ++static bool spirv_compiler_has_combined_sampler_for_sampler(const struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register_range *range) ++{ ++ const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; ++ const struct vkd3d_shader_combined_resource_sampler *combined_sampler; ++ unsigned int i; ++ ++ if (!shader_interface->combined_sampler_count) + return false; + +- if (sampler && sampler->range.first != sampler->range.last) ++ if (range->last != range->first) + return false; + + for (i = 0; i < shader_interface->combined_sampler_count; ++i) +@@ -2622,10 +2642,8 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com + if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) + continue; + +- if ((!resource || (combined_sampler->resource_space == resource->range.space +- && combined_sampler->resource_index == resource->range.first)) +- && (!sampler || (combined_sampler->sampler_space == sampler->range.space +- && combined_sampler->sampler_index == sampler->range.first))) ++ if (combined_sampler->sampler_space == range->space ++ && combined_sampler->sampler_index == range->first) + return true; + } + +@@ -2643,6 +2661,16 @@ static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_error(struct spirv_compiler * + compiler->failed = true; + } + ++static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_warning(struct spirv_compiler *compiler, ++ enum vkd3d_shader_error error, const char *format, ...) ++{ ++ va_list args; ++ ++ va_start(args, format); ++ vkd3d_shader_vwarning(compiler->message_context, &compiler->location, error, format, args); ++ va_end(args); ++} ++ + static struct vkd3d_string_buffer *vkd3d_shader_register_range_string(struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range) + { +@@ -5538,8 +5566,8 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * + return var_id; + } + +-static void spirv_compiler_emit_constant_buffer(struct spirv_compiler *compiler, unsigned int size, +- const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_register *reg) ++static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; +@@ -5548,13 +5576,20 @@ static void spirv_compiler_emit_constant_buffer(struct spirv_compiler *compiler, + struct vkd3d_descriptor_variable_info var_info; + struct vkd3d_symbol reg_symbol; + ++ struct vkd3d_shader_register reg = ++ { ++ .type = VKD3DSPR_CONSTBUFFER, ++ .idx[0].offset = register_id, ++ .idx_count = 1, ++ }; ++ + if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, range))) + { + /* Push constant buffers are handled in + * spirv_compiler_emit_push_constant_buffers(). + */ + unsigned int cb_size_in_bytes = size * VKD3D_VEC4_SIZE * sizeof(uint32_t); +- push_cb->reg = *reg; ++ push_cb->reg = reg; + push_cb->size = size; + if (cb_size_in_bytes > push_cb->pc.size) + { +@@ -5575,9 +5610,9 @@ static void spirv_compiler_emit_constant_buffer(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); + + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, +- reg, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); ++ ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); + +- vkd3d_symbol_make_register(®_symbol, reg); ++ vkd3d_symbol_make_register(®_symbol, ®); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + reg_symbol.descriptor_array = var_info.array_symbol; +@@ -5585,16 +5620,6 @@ static void spirv_compiler_emit_constant_buffer(struct spirv_compiler *compiler, + spirv_compiler_put_symbol(compiler, ®_symbol); + } + +-static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; +- +- assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); +- +- spirv_compiler_emit_constant_buffer(compiler, cb->size, &cb->range, &cb->src.reg); +-} +- + static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { +@@ -5628,29 +5653,34 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi + spirv_compiler_put_symbol(compiler, ®_symbol); + } + +-static void spirv_compiler_emit_dcl_sampler(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register_range *range, unsigned int register_id) + { +- const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; + const SpvStorageClass storage_class = SpvStorageClassUniformConstant; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_register *reg = &sampler->src.reg; + struct vkd3d_descriptor_variable_info var_info; + struct vkd3d_symbol reg_symbol; + uint32_t type_id, var_id; + +- vkd3d_symbol_make_sampler(®_symbol, reg); +- reg_symbol.info.sampler.range = sampler->range; ++ const struct vkd3d_shader_register reg = ++ { ++ .type = VKD3DSPR_SAMPLER, ++ .idx[0].offset = register_id, ++ .idx_count = 1, ++ }; ++ ++ vkd3d_symbol_make_sampler(®_symbol, ®); ++ reg_symbol.info.sampler.range = *range; + spirv_compiler_put_symbol(compiler, ®_symbol); + +- if (spirv_compiler_has_combined_sampler(compiler, NULL, sampler)) ++ if (spirv_compiler_has_combined_sampler_for_sampler(compiler, range)) + return; + + type_id = vkd3d_spirv_get_op_type_sampler(builder); +- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, +- &sampler->range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); ++ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, ++ range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); + +- vkd3d_symbol_make_register(®_symbol, reg); ++ vkd3d_symbol_make_register(®_symbol, ®); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + reg_symbol.descriptor_array = var_info.array_symbol; +@@ -5832,20 +5862,30 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi + } + + static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, +- const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, +- enum vkd3d_data_type resource_data_type, unsigned int structure_stride, bool raw) ++ const struct vkd3d_shader_register_range *range, unsigned int register_id, ++ unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, ++ enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) + { + struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + SpvStorageClass storage_class = SpvStorageClassUniformConstant; + uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; +- const struct vkd3d_shader_register *reg = &resource->reg.reg; + const struct vkd3d_spirv_resource_type *resource_type_info; + enum vkd3d_shader_component_type sampled_type; + struct vkd3d_symbol resource_symbol; +- bool is_uav; + +- is_uav = reg->type == VKD3DSPR_UAV; ++ struct vkd3d_shader_register reg = ++ { ++ .type = is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, ++ .idx[0].offset = register_id, ++ .idx_count = 1, ++ }; ++ ++ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && sample_count == 1) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; ++ + if (!(resource_type_info = spirv_compiler_enable_resource_type(compiler, + resource_type, is_uav))) + { +@@ -5853,11 +5893,11 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + return; + } + +- sampled_type = vkd3d_component_type_from_data_type(resource_data_type); ++ sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type); + +- if (spirv_compiler_has_combined_sampler(compiler, resource, NULL)) ++ if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) + { +- spirv_compiler_emit_combined_sampler_declarations(compiler, reg, &resource->range, ++ spirv_compiler_emit_combined_sampler_declarations(compiler, ®, range, + resource_type, sampled_type, structure_stride, raw, resource_type_info); + return; + } +@@ -5880,19 +5920,18 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + } + else + { +- type_id = spirv_compiler_get_image_type_id(compiler, reg, &resource->range, ++ type_id = spirv_compiler_get_image_type_id(compiler, ®, range, + resource_type_info, sampled_type, structure_stride || raw, 0); + } + +- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, +- &resource->range, resource_type, false, &var_info); ++ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, ++ range, resource_type, false, &var_info); + + if (is_uav) + { + const struct vkd3d_shader_descriptor_info1 *d; + +- d = spirv_compiler_get_descriptor_info(compiler, +- VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); ++ d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); + + if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) + vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); +@@ -5924,15 +5963,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + type_id = struct_id; + } + +- counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, +- &resource->range, resource_type, true, &counter_var_info); ++ counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, ++ type_id, ®, range, resource_type, true, &counter_var_info); + } + } + +- vkd3d_symbol_make_resource(&resource_symbol, reg); ++ vkd3d_symbol_make_resource(&resource_symbol, ®); + resource_symbol.id = var_id; + resource_symbol.descriptor_array = var_info.array_symbol; +- resource_symbol.info.resource.range = resource->range; ++ resource_symbol.info.resource.range = *range; + resource_symbol.info.resource.sampled_type = sampled_type; + resource_symbol.info.resource.type_id = type_id; + resource_symbol.info.resource.resource_type_info = resource_type_info; +@@ -5945,58 +5984,6 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + spirv_compiler_put_symbol(compiler, &resource_symbol); + } + +-static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; +- enum vkd3d_shader_resource_type resource_type = semantic->resource_type; +- uint32_t flags = instruction->flags; +- +- /* We don't distinguish between APPEND and COUNTER UAVs. */ +- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; +- if (flags) +- FIXME("Unhandled UAV flags %#x.\n", flags); +- +- if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1) +- resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; +- else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1) +- resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; +- +- spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, +- resource_type, semantic->resource_data_type[0], 0, false); +-} +- +-static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_raw_resource *resource = &instruction->declaration.raw_resource; +- uint32_t flags = instruction->flags; +- +- /* We don't distinguish between APPEND and COUNTER UAVs. */ +- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; +- if (flags) +- FIXME("Unhandled UAV flags %#x.\n", flags); +- +- spirv_compiler_emit_resource_declaration(compiler, &resource->resource, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, 0, true); +-} +- +-static void spirv_compiler_emit_dcl_resource_structured(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_structured_resource *resource = &instruction->declaration.structured_resource; +- unsigned int stride = resource->byte_stride; +- uint32_t flags = instruction->flags; +- +- /* We don't distinguish between APPEND and COUNTER UAVs. */ +- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; +- if (flags) +- FIXME("Unhandled UAV flags %#x.\n", flags); +- +- spirv_compiler_emit_resource_declaration(compiler, &resource->resource, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, stride / 4, false); +-} +- + static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) + { +@@ -7466,7 +7453,13 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c + assert(compiler->control_flow_depth); + assert(cf_info->current_block == VKD3D_BLOCK_SWITCH); + +- assert(src->swizzle == VKD3D_SHADER_NO_SWIZZLE && src->reg.type == VKD3DSPR_IMMCONST); ++ if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) ++ { ++ WARN("Unexpected src swizzle %#x.\n", src->swizzle); ++ spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE, ++ "The swizzle for a switch case value is not scalar."); ++ } ++ assert(src->reg.type == VKD3DSPR_IMMCONST); + value = *src->reg.u.immconst_uint; + + if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.case_blocks, &cf_info->u.switch_.case_blocks_size, +@@ -9174,27 +9167,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_INDEXABLE_TEMP: + spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); + break; +- case VKD3DSIH_DCL_CONSTANT_BUFFER: +- spirv_compiler_emit_dcl_constant_buffer(compiler, instruction); +- break; + case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: + spirv_compiler_emit_dcl_immediate_constant_buffer(compiler, instruction); + break; +- case VKD3DSIH_DCL_SAMPLER: +- spirv_compiler_emit_dcl_sampler(compiler, instruction); +- break; +- case VKD3DSIH_DCL: +- case VKD3DSIH_DCL_UAV_TYPED: +- spirv_compiler_emit_dcl_resource(compiler, instruction); +- break; +- case VKD3DSIH_DCL_RESOURCE_RAW: +- case VKD3DSIH_DCL_UAV_RAW: +- spirv_compiler_emit_dcl_resource_raw(compiler, instruction); +- break; +- case VKD3DSIH_DCL_RESOURCE_STRUCTURED: +- case VKD3DSIH_DCL_UAV_STRUCTURED: +- spirv_compiler_emit_dcl_resource_structured(compiler, instruction); +- break; + case VKD3DSIH_DCL_TGSM_RAW: + spirv_compiler_emit_dcl_tgsm_raw(compiler, instruction); + break; +@@ -9490,8 +9465,16 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_CUT_STREAM: + spirv_compiler_emit_cut_stream(compiler, instruction); + break; ++ case VKD3DSIH_DCL: ++ case VKD3DSIH_DCL_CONSTANT_BUFFER: + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: ++ case VKD3DSIH_DCL_RESOURCE_RAW: ++ case VKD3DSIH_DCL_RESOURCE_STRUCTURED: ++ case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_TEMPS: ++ case VKD3DSIH_DCL_UAV_RAW: ++ case VKD3DSIH_DCL_UAV_STRUCTURED: ++ case VKD3DSIH_DCL_UAV_TYPED: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_NOP: + /* nothing to do */ +@@ -9503,24 +9486,48 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + return ret; + } + +-static void spirv_compiler_emit_sm1_constant_buffer(struct spirv_compiler *compiler, +- const struct vkd3d_shader_desc *desc, enum vkd3d_shader_d3dbc_constant_register set, +- enum vkd3d_data_type data_type) ++static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) + { +- struct vkd3d_shader_register_range range = {.space = 0, .first = set, .last = set}; +- uint32_t count = desc->flat_constant_count[set].external; +- struct vkd3d_shader_register reg = ++ unsigned int i; ++ ++ for (i = 0; i < compiler->scan_descriptor_info->descriptor_count; ++i) + { +- .type = VKD3DSPR_CONSTBUFFER, +- .idx[0].offset = set, /* register ID */ +- .idx[1].offset = set, /* register index */ +- .idx[2].offset = count, /* size */ +- .idx_count = 3, +- .data_type = data_type, +- }; ++ const struct vkd3d_shader_descriptor_info1 *descriptor = &compiler->scan_descriptor_info->descriptors[i]; ++ struct vkd3d_shader_register_range range; ++ ++ range.first = descriptor->register_index; ++ if (descriptor->count == ~0u) ++ range.last = ~0u; ++ else ++ range.last = descriptor->register_index + descriptor->count - 1; ++ range.space = descriptor->register_space; ++ ++ switch (descriptor->type) ++ { ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: ++ spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); ++ break; ++ ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: ++ spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); ++ break; + +- if (count) +- spirv_compiler_emit_constant_buffer(compiler, count, &range, ®); ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: ++ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, ++ descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, ++ descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); ++ break; ++ ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: ++ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, ++ descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, ++ descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ } + } + + static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +@@ -9538,12 +9545,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + if (parser->shader_desc.temp_count) + spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); + +- spirv_compiler_emit_sm1_constant_buffer(compiler, &parser->shader_desc, +- VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, VKD3D_DATA_FLOAT); +- spirv_compiler_emit_sm1_constant_buffer(compiler, &parser->shader_desc, +- VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, VKD3D_DATA_INT); +- spirv_compiler_emit_sm1_constant_buffer(compiler, &parser->shader_desc, +- VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, VKD3D_DATA_UINT); ++ spirv_compiler_emit_descriptor_declarations(compiler); + + compiler->location.column = 0; + compiler->location.line = 1; +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 550f9b27cc7..7949be150bf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -507,7 +507,7 @@ enum vkd3d_sm4_input_primitive_type + + enum vkd3d_sm4_swizzle_type + { +- VKD3D_SM4_SWIZZLE_NONE = 0x0, ++ VKD3D_SM4_SWIZZLE_NONE = 0x0, /* swizzle bitfield contains a mask */ + VKD3D_SM4_SWIZZLE_VEC4 = 0x1, + VKD3D_SM4_SWIZZLE_SCALAR = 0x2, + }; +@@ -707,6 +707,19 @@ static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, + VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; + } + ++static void shader_sm4_read_case_condition(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, ++ (struct vkd3d_shader_src_param *)&ins->src[0]); ++ if (ins->src[0].reg.type != VKD3DSPR_IMMCONST) ++ { ++ FIXME("Switch case value is not a 32-bit constant.\n"); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE, ++ "Switch case value is not a 32-bit immediate constant register."); ++ } ++} ++ + static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) + { +@@ -1215,7 +1228,8 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = + {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, + {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", + shader_sm4_read_conditional_op}, +- {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, ++ {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", ++ shader_sm4_read_case_condition}, + {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, + {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", + shader_sm4_read_conditional_op}, +@@ -2012,6 +2026,7 @@ static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_pa + static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) + { ++ unsigned int dimension, mask; + DWORD token; + + if (*ptr >= end) +@@ -2027,37 +2042,63 @@ static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, cons + return false; + } + +- if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) ++ switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) + { +- src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; +- } +- else +- { +- enum vkd3d_sm4_swizzle_type swizzle_type = +- (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ case VKD3D_SM4_DIMENSION_NONE: ++ case VKD3D_SM4_DIMENSION_SCALAR: ++ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ break; + +- switch (swizzle_type) ++ case VKD3D_SM4_DIMENSION_VEC4: + { +- case VKD3D_SM4_SWIZZLE_NONE: +- if (shader_sm4_is_scalar_register(&src_param->reg)) +- src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +- else ++ enum vkd3d_sm4_swizzle_type swizzle_type = ++ (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ ++ switch (swizzle_type) ++ { ++ case VKD3D_SM4_SWIZZLE_NONE: + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; +- break; + +- case VKD3D_SM4_SWIZZLE_SCALAR: +- src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; +- src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; +- break; ++ mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; ++ /* Mask seems only to be used for vec4 constants and is always zero. */ ++ if (!register_is_constant(&src_param->reg)) ++ { ++ FIXME("Source mask %#x is not for a constant.\n", mask); ++ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, ++ "Unhandled mask %#x for a non-constant source register.", mask); ++ } ++ else if (mask) ++ { ++ FIXME("Unhandled mask %#x.\n", mask); ++ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, ++ "Unhandled source register mask %#x.", mask); ++ } + +- case VKD3D_SM4_SWIZZLE_VEC4: +- src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); +- break; ++ break; + +- default: +- FIXME("Unhandled swizzle type %#x.\n", swizzle_type); +- break; ++ case VKD3D_SM4_SWIZZLE_SCALAR: ++ src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; ++ src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; ++ break; ++ ++ case VKD3D_SM4_SWIZZLE_VEC4: ++ src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); ++ break; ++ ++ default: ++ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, ++ "Source register swizzle type %#x is invalid.", swizzle_type); ++ break; ++ } ++ break; + } ++ ++ default: ++ FIXME("Unhandled dimension %#x.\n", dimension); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, ++ "Source register dimension %#x is invalid.", dimension); ++ break; + } + + if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, +@@ -2070,7 +2111,9 @@ static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, cons + static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) + { ++ enum vkd3d_sm4_swizzle_type swizzle_type; + enum vkd3d_shader_src_modifier modifier; ++ unsigned int dimension, swizzle; + DWORD token; + + if (*ptr >= end) +@@ -2092,10 +2135,53 @@ static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, cons + return false; + } + +- dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; ++ switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) ++ { ++ case VKD3D_SM4_DIMENSION_NONE: ++ dst_param->write_mask = 0; ++ break; ++ ++ case VKD3D_SM4_DIMENSION_SCALAR: ++ dst_param->write_mask = VKD3DSP_WRITEMASK_0; ++ break; ++ ++ case VKD3D_SM4_DIMENSION_VEC4: ++ swizzle_type = (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ switch (swizzle_type) ++ { ++ case VKD3D_SM4_SWIZZLE_NONE: ++ dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; ++ break; ++ ++ case VKD3D_SM4_SWIZZLE_VEC4: ++ swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); ++ if (swizzle != VKD3D_SHADER_NO_SWIZZLE) ++ { ++ FIXME("Unhandled swizzle %#x.\n", swizzle); ++ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE, ++ "Unhandled destination register swizzle %#x.", swizzle); ++ } ++ dst_param->write_mask = VKD3DSP_WRITEMASK_ALL; ++ break; ++ ++ default: ++ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, ++ "Destination register swizzle type %#x is invalid.", swizzle_type); ++ break; ++ } ++ break; ++ ++ default: ++ FIXME("Unhandled dimension %#x.\n", dimension); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, ++ "Destination register dimension %#x is invalid.", dimension); ++ break; ++ } ++ + if (data_type == VKD3D_DATA_DOUBLE) + dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); +- /* Scalar registers are declared with no write mask in shader bytecode. */ ++ /* Some scalar registers are declared with no write mask in shader bytecode. */ + if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + dst_param->modifiers = 0; +@@ -3715,8 +3801,10 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + for (i = 0; i < 4; ++i) + { +- if (map_writemask & (1u << i)) ++ if ((map_writemask & (1u << i)) && (j < width)) + src->reg.immconst_uint[i] = value->u[j++].u; ++ else ++ src->reg.immconst_uint[i] = 0; + } + } + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index bf925a44690..84614a4eb79 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -78,9 +78,14 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, + VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, + VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, ++ VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, ++ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, ++ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009, + + VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, + VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, ++ VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK = 1302, ++ VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE = 1303, + + VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, + VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, +@@ -88,6 +93,8 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED = 2003, + VKD3D_SHADER_ERROR_SPV_STENCIL_EXPORT_UNSUPPORTED = 2004, + ++ VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, ++ + VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, + VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, + VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE = 3002, +@@ -140,6 +147,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE = 5302, + VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, + VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, ++ VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, + + VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, + +@@ -1012,6 +1020,11 @@ static inline bool vkd3d_shader_register_is_patch_constant(const struct vkd3d_sh + return reg->type == VKD3DSPR_PATCHCONST; + } + ++static inline bool register_is_constant(const struct vkd3d_shader_register *reg) ++{ ++ return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); ++} ++ + struct vkd3d_shader_location + { + const char *source_name; +@@ -1310,6 +1323,30 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( + } + } + ++static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resource_data_type( ++ enum vkd3d_shader_resource_data_type data_type) ++{ ++ switch (data_type) ++ { ++ case VKD3D_SHADER_RESOURCE_DATA_FLOAT: ++ case VKD3D_SHADER_RESOURCE_DATA_UNORM: ++ case VKD3D_SHADER_RESOURCE_DATA_SNORM: ++ return VKD3D_SHADER_COMPONENT_FLOAT; ++ case VKD3D_SHADER_RESOURCE_DATA_UINT: ++ return VKD3D_SHADER_COMPONENT_UINT; ++ case VKD3D_SHADER_RESOURCE_DATA_INT: ++ return VKD3D_SHADER_COMPONENT_INT; ++ case VKD3D_SHADER_RESOURCE_DATA_DOUBLE: ++ case VKD3D_SHADER_RESOURCE_DATA_CONTINUED: ++ return VKD3D_SHADER_COMPONENT_DOUBLE; ++ default: ++ FIXME("Unhandled data type %#x.\n", data_type); ++ /* fall-through */ ++ case VKD3D_SHADER_RESOURCE_DATA_MIXED: ++ return VKD3D_SHADER_COMPONENT_UINT; ++ } ++} ++ + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, + unsigned int index); + +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 8b5f7899cf3..42a98763438 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -3225,7 +3225,10 @@ static void command_list_add_descriptor_heap(struct d3d12_command_list *list, st + { + /* Descriptors can be written after binding. */ + FIXME("Flushing descriptor updates while list %p is not closed.\n", list); +- command_list_flush_vk_heap_updates(list); ++ vkd3d_mutex_lock(&heap->vk_sets_mutex); ++ d3d12_desc_flush_vk_heap_updates_locked(heap, list->device); ++ vkd3d_mutex_unlock(&heap->vk_sets_mutex); ++ return; + } + list->descriptor_heaps[list->descriptor_heap_count++] = heap; + } +-- +2.40.1 + diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-f649db23a596c1865bc7f110ca1feb38684.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-f649db23a596c1865bc7f110ca1feb38684.patch deleted file mode 100644 index e187d68d..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-f649db23a596c1865bc7f110ca1feb38684.patch +++ /dev/null @@ -1,458 +0,0 @@ -From 0d536e339fc3dcc3d90ef593818e2f6af63301d7 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 4 Aug 2023 19:27:19 +1000 -Subject: [PATCH] Updated vkd3d to f649db23a596c1865bc7f110ca1feb3868451375. - ---- - libs/vkd3d/include/vkd3d_shader.h | 107 +++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 8 +- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 1 + - libs/vkd3d/libs/vkd3d-shader/ir.c | 73 +++++++++++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 16 ++- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 38 +++++++ - .../libs/vkd3d-shader/vkd3d_shader_private.h | 11 +- - 7 files changed, 245 insertions(+), 9 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index d6653d18e56..e98aad4fe95 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -90,6 +90,11 @@ enum vkd3d_shader_structure_type - * \since 1.9 - */ - VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, -+ /** -+ * The structure is a vkd3d_shader_next_stage_info structure. -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), - }; -@@ -1676,6 +1681,76 @@ struct vkd3d_shader_scan_signature_info - struct vkd3d_shader_signature patch_constant; - }; - -+/** -+ * Describes the mapping of a output varying register in a shader stage, -+ * to an input varying register in the following shader stage. -+ * -+ * This structure is used in struct vkd3d_shader_next_stage_info. -+ */ -+struct vkd3d_shader_varying_map -+{ -+ /** -+ * The signature index (in the output signature) of the output varying. -+ * If greater than or equal to the number of elements in the output -+ * signature, signifies that the varying is consumed by the next stage but -+ * not written by this one. -+ */ -+ unsigned int output_signature_index; -+ /** The register index of the input varying to map this register to. */ -+ unsigned int input_register_index; -+ /** The mask consumed by the destination register. */ -+ unsigned int input_mask; -+}; -+ -+/** -+ * A chained structure which describes the next shader in the pipeline. -+ * -+ * This structure is optional, and should only be provided if there is in fact -+ * another shader in the pipeline. -+ * However, depending on the input and output formats, this structure may be -+ * necessary in order to generate shaders which correctly match each other. -+ * If the structure or its individual fields are not provided, vkd3d-shader -+ * will generate shaders which may be correct in isolation, but are not -+ * guaranteed to correctly match each other. -+ * -+ * This structure is passed to vkd3d_shader_compile() and extends -+ * vkd3d_shader_compile_info. -+ * -+ * This structure contains only input parameters. -+ * -+ * \since 1.9 -+ */ -+struct vkd3d_shader_next_stage_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** -+ * A mapping of output varyings in this shader stage to input varyings -+ * in the next shader stage. -+ * -+ * This mapping should include exactly one element for each varying -+ * consumed by the next shader stage. -+ * If this shader stage outputs a varying that is not consumed by the next -+ * shader stage, that varying should be absent from this array. -+ * -+ * If this field is absent, vkd3d-shader will map varyings from one stage -+ * to another based on their register index. -+ * For Direct3D shader model 3.0, such a default mapping will be incorrect -+ * unless the registers are allocated in the same order, and hence this -+ * field is necessary to correctly match inter-stage varyings. -+ * This mapping may also be necessary under other circumstances where the -+ * varying interface does not match exactly. -+ * -+ * This mapping may be constructed by vkd3d_shader_build_varying_map(). -+ */ -+ const struct vkd3d_shader_varying_map *varying_map; -+ /** The number of registers provided in \ref varying_map. */ -+ unsigned int varying_count; -+}; -+ - #ifdef LIBVKD3D_SHADER_SOURCE - # define VKD3D_SHADER_API VKD3D_EXPORT - #else -@@ -1748,13 +1823,14 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported - * - * Depending on the source and target types, this function may support the - * following chained structures: -+ * - vkd3d_shader_hlsl_source_info - * - vkd3d_shader_interface_info -+ * - vkd3d_shader_next_stage_info - * - vkd3d_shader_scan_descriptor_info - * - vkd3d_shader_scan_signature_info - * - vkd3d_shader_spirv_domain_shader_target_info - * - vkd3d_shader_spirv_target_info - * - vkd3d_shader_transform_feedback_info -- * - vkd3d_shader_hlsl_source_info - * - * \param compile_info A chained structure containing compilation parameters. - * -@@ -2188,6 +2264,35 @@ VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, - */ - VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); - -+/** -+ * Build a mapping of output varyings in a shader stage to input varyings in -+ * the following shader stage. -+ * -+ * This mapping should be used in struct vkd3d_shader_next_stage_info to -+ * compile the first shader. -+ * -+ * \param output_signature The output signature of the first shader. -+ * -+ * \param input_signature The input signature of the second shader. -+ * -+ * \param count On output, contains the number of entries written into -+ * \ref varyings. -+ * -+ * \param varyings Pointer to an output array of varyings. -+ * This must point to space for N varyings, where N is the number of elements -+ * in the input signature. -+ * -+ * \remark Valid legacy Direct3D pixel shaders have at most 12 varying inputs: -+ * 10 inter-stage varyings, face, and position. -+ * Therefore, in practice, it is safe to call this function with a -+ * pre-allocated array with a fixed size of 12. -+ * -+ * \since 1.9 -+ */ -+VKD3D_SHADER_API void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, -+ const struct vkd3d_shader_signature *input_signature, -+ unsigned int *count, struct vkd3d_shader_varying_map *varyings); -+ - #endif /* VKD3D_SHADER_NO_PROTOTYPES */ - - /** Type of vkd3d_shader_get_version(). */ -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index fe739339bd1..35e5c454d57 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -524,6 +524,8 @@ static struct signature_element *find_signature_element_by_register_index( - return NULL; - } - -+#define SM1_COLOR_REGISTER_OFFSET 8 -+ - static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, - const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, - unsigned int register_index, bool is_dcl, unsigned int mask) -@@ -555,6 +557,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - element->sysval_semantic = sysval; - element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; - element->register_index = register_index; -+ element->target_location = register_index; - element->register_count = 1; - element->mask = mask; - element->used_mask = is_dcl ? 0 : mask; -@@ -606,7 +609,7 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * - return true; - } - return add_signature_element(sm1, false, "COLOR", register_index, -- VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); - - case VKD3DSPR_TEXTURE: - /* For vertex shaders, this is ADDR. */ -@@ -633,6 +636,9 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * - /* fall through */ - - case VKD3DSPR_ATTROUT: -+ return add_signature_element(sm1, true, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); -+ - case VKD3DSPR_COLOROUT: - return add_signature_element(sm1, true, "COLOR", register_index, - VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 716b7bdb721..cedc3da4a83 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -391,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - read_dword(&ptr, &e[i].sysval_semantic); - read_dword(&ptr, &e[i].component_type); - read_dword(&ptr, &e[i].register_index); -+ e[i].target_location = e[i].register_index; - e[i].register_count = 1; - read_dword(&ptr, &mask); - e[i].mask = mask & 0xff; -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index d74f81afc39..705905f7888 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -85,6 +85,72 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i - shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); - } - -+static const struct vkd3d_shader_varying_map *find_varying_map( -+ const struct vkd3d_shader_next_stage_info *next_stage, unsigned int signature_idx) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < next_stage->varying_count; ++i) -+ { -+ if (next_stage->varying_map[i].output_signature_index == signature_idx) -+ return &next_stage->varying_map[i]; -+ } -+ -+ return NULL; -+} -+ -+static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info) -+{ -+ struct shader_signature *signature = &parser->shader_desc.output_signature; -+ const struct vkd3d_shader_next_stage_info *next_stage; -+ unsigned int i; -+ -+ if (!(next_stage = vkd3d_find_struct(compile_info->next, NEXT_STAGE_INFO))) -+ return VKD3D_OK; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ const struct vkd3d_shader_varying_map *map = find_varying_map(next_stage, i); -+ struct signature_element *e = &signature->elements[i]; -+ -+ if (map) -+ { -+ unsigned int input_mask = map->input_mask; -+ -+ e->target_location = map->input_register_index; -+ -+ /* It is illegal in Vulkan if the next shader uses the same varying -+ * location with a different mask. */ -+ if (input_mask && input_mask != e->mask) -+ { -+ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Aborting due to not yet implemented feature: " -+ "Output mask %#x does not match input mask %#x.", -+ e->mask, input_mask); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ } -+ else -+ { -+ e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; -+ } -+ } -+ -+ for (i = 0; i < next_stage->varying_count; ++i) -+ { -+ if (next_stage->varying_map[i].output_signature_index >= signature->element_count) -+ { -+ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Aborting due to not yet implemented feature: " -+ "The next stage consumes varyings not written by this stage."); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ - struct hull_flattener - { - struct vkd3d_shader_instruction_array instructions; -@@ -1194,7 +1260,8 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vkd3d - return VKD3D_OK; - } - --enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser) -+enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info) - { - struct vkd3d_shader_instruction_array *instructions = &parser->instructions; - enum vkd3d_result result = VKD3D_OK; -@@ -1202,6 +1269,10 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser) - if (parser->shader_desc.is_dxil) - return result; - -+ if (parser->shader_version.type != VKD3D_SHADER_TYPE_PIXEL -+ && (result = remap_output_signature(parser, compile_info)) < 0) -+ return result; -+ - if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL - && (result = instruction_array_flatten_hull_shader_phases(instructions)) >= 0) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index d71f0a698d9..2725ed80cd1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -4602,7 +4602,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - } - else - { -- unsigned int location = signature_element->register_index; -+ unsigned int location = signature_element->target_location; - - input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, input_component_count, array_sizes, 2); -@@ -4978,9 +4978,15 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - - spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); - } -+ else if (signature_element->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ { -+ storage_class = SpvStorageClassPrivate; -+ id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -+ storage_class, component_type, output_component_count, array_sizes, 2); -+ } - else - { -- unsigned int location = signature_element->register_index; -+ unsigned int location = signature_element->target_location; - - if (is_patch_constant) - location += shader_signature_next_location(&compiler->output_signature); -@@ -4989,10 +4995,10 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - storage_class, component_type, output_component_count, array_sizes, 2); - vkd3d_spirv_add_iface_variable(builder, id); - -- if (is_dual_source_blending(compiler) && signature_element->register_index < 2) -+ if (is_dual_source_blending(compiler) && location < 2) - { - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); -- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); -+ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, location); - } - else - { -@@ -9542,7 +9548,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - compiler->location.column = 0; - compiler->location.line = 1; - -- if ((result = vkd3d_shader_normalise(parser)) < 0) -+ if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) - return result; - - instructions = parser->instructions; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index d59cd704ceb..512d9ea41e7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1891,3 +1891,41 @@ void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *ins - vkd3d_free(instructions->icbs[i]); - vkd3d_free(instructions->icbs); - } -+ -+void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, -+ const struct vkd3d_shader_signature *input_signature, -+ unsigned int *ret_count, struct vkd3d_shader_varying_map *varyings) -+{ -+ unsigned int count = 0; -+ unsigned int i; -+ -+ TRACE("output_signature %p, input_signature %p, ret_count %p, varyings %p.\n", -+ output_signature, input_signature, ret_count, varyings); -+ -+ for (i = 0; i < input_signature->element_count; ++i) -+ { -+ const struct vkd3d_shader_signature_element *input_element, *output_element; -+ -+ input_element = &input_signature->elements[i]; -+ -+ if (input_element->sysval_semantic != VKD3D_SHADER_SV_NONE) -+ continue; -+ -+ varyings[count].input_register_index = input_element->register_index; -+ varyings[count].input_mask = input_element->mask; -+ -+ if ((output_element = vkd3d_shader_find_signature_element(output_signature, -+ input_element->semantic_name, input_element->semantic_index, 0))) -+ { -+ varyings[count].output_signature_index = output_element - output_signature->elements; -+ } -+ else -+ { -+ varyings[count].output_signature_index = output_signature->element_count; -+ } -+ -+ ++count; -+ } -+ -+ *ret_count = count; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index d35f49a63a2..dc43175d4b5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -168,6 +168,8 @@ enum vkd3d_shader_error - VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, - VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, - VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS = 8304, -+ -+ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, - }; - - enum vkd3d_shader_opcode -@@ -807,6 +809,8 @@ enum vkd3d_shader_input_sysval_semantic - VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, - }; - -+#define SIGNATURE_TARGET_LOCATION_UNUSED (~0u) -+ - struct signature_element - { - unsigned int sort_index; -@@ -815,11 +819,15 @@ struct signature_element - unsigned int stream_index; - enum vkd3d_shader_sysval_semantic sysval_semantic; - enum vkd3d_shader_component_type component_type; -+ /* Register index in the source shader. */ - unsigned int register_index; - unsigned int register_count; - unsigned int mask; - unsigned int used_mask; - enum vkd3d_shader_minimum_precision min_precision; -+ /* Register index / location in the target shader. -+ * If SIGNATURE_TARGET_LOCATION_UNUSED, this element should not be written. */ -+ unsigned int target_location; - }; - - struct shader_signature -@@ -1406,6 +1414,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void - void dxbc_writer_init(struct dxbc_writer *dxbc); - int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); - --enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser); -+enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info); - - #endif /* __VKD3D_SHADER_PRIVATE_H */ --- -2.40.1 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch deleted file mode 100644 index 7ea60aca..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch +++ /dev/null @@ -1,3610 +0,0 @@ -From c5de2391c76b56a016df7907ce484035f1ace2b6 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Sat, 19 Aug 2023 10:47:49 +1000 -Subject: [PATCH] Updated vkd3d to 4f2e07a45d0cdb82b1cbba0cfe95c87a69799865. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 4 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 18 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 11 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 61 +- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 128 ++- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 170 +++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 18 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 865 ++++++++++-------- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 189 ++-- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 25 +- - 10 files changed, 907 insertions(+), 582 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 35e5c454d57..99a5bd7a438 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1686,7 +1686,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - else - { - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); -- put_u32(buffer, var->regs[r].bind_count); -+ put_u32(buffer, var->bind_count[r]); - } - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* FIXME: default value */ -@@ -2033,7 +2033,7 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) - continue; - -- count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; -+ count = var->bind_count[HLSL_REGSET_SAMPLERS]; - - for (i = 0; i < count; ++i) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 4ed7712b0aa..8b706e1e667 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -1544,7 +1544,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, - hlsl_block_cleanup(dst_block); - return false; - } -- list_add_tail(&dst_block->instrs, &dst->entry); -+ hlsl_block_add_instr(dst_block, dst); - - if (!list_empty(&src->uses)) - { -@@ -2244,11 +2244,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) - - static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr); - --static void dump_instr_list(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct list *list) -+static void dump_block(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_block *block) - { - struct hlsl_ir_node *instr; - -- LIST_FOR_EACH_ENTRY(instr, list, struct hlsl_ir_node, entry) -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - dump_instr(ctx, buffer, instr); - vkd3d_string_buffer_printf(buffer, "\n"); -@@ -2490,9 +2490,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - vkd3d_string_buffer_printf(buffer, "if ("); - dump_src(buffer, &if_node->condition); - vkd3d_string_buffer_printf(buffer, ") {\n"); -- dump_instr_list(ctx, buffer, &if_node->then_block.instrs); -+ dump_block(ctx, buffer, &if_node->then_block); - vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); -- dump_instr_list(ctx, buffer, &if_node->else_block.instrs); -+ dump_block(ctx, buffer, &if_node->else_block); - vkd3d_string_buffer_printf(buffer, " %10s }", ""); - } - -@@ -2525,7 +2525,7 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i - static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) - { - vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); -- dump_instr_list(ctx, buffer, &loop->body.instrs); -+ dump_block(ctx, buffer, &loop->body); - vkd3d_string_buffer_printf(buffer, " %10s }", ""); - } - -@@ -2713,7 +2713,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl - vkd3d_string_buffer_printf(&buffer, "\n"); - } - if (func->has_body) -- dump_instr_list(ctx, &buffer, &func->body.instrs); -+ dump_block(ctx, &buffer, &func->body); - - vkd3d_string_buffer_trace(&buffer); - vkd3d_string_buffer_cleanup(&buffer); -@@ -2922,7 +2922,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) - - for (i = 0; i < attr->args_count; ++i) - hlsl_src_remove(&attr->args[i]); -- hlsl_free_instr_list(&attr->instrs); -+ hlsl_block_cleanup(&attr->instrs); - vkd3d_free((void *)attr->name); - vkd3d_free(attr); - } -@@ -3377,6 +3377,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - struct hlsl_type *type, *next_type; - unsigned int i; - -+ hlsl_block_cleanup(&ctx->static_initializers); -+ - for (i = 0; i < ctx->source_files_count; ++i) - vkd3d_free((void *)ctx->source_files[i]); - vkd3d_free(ctx->source_files); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index b1928312066..0a8d3a692a3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -257,7 +257,7 @@ struct hlsl_reg - /* Number of registers to be allocated. - * Unlike the variable's type's regsize, it is not expressed in register components, but rather - * in whole registers, and may depend on which components are used within the shader. */ -- uint32_t bind_count; -+ uint32_t allocation_size; - /* For numeric registers, a writemask can be provided to indicate the reservation of only some - * of the 4 components. */ - unsigned int writemask; -@@ -337,7 +337,7 @@ struct hlsl_src - struct hlsl_attribute - { - const char *name; -- struct list instrs; -+ struct hlsl_block instrs; - struct vkd3d_shader_location loc; - unsigned int args_count; - struct hlsl_src args[]; -@@ -417,6 +417,9 @@ struct hlsl_ir_var - enum hlsl_sampler_dim sampler_dim; - struct vkd3d_shader_location first_sampler_dim_loc; - } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; -+ /* Minimum number of binds required to include all object components actually used in the shader. -+ * It may be less than the allocation size, e.g. for texture arrays. */ -+ unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; - - uint32_t is_input_semantic : 1; - uint32_t is_output_semantic : 1; -@@ -1150,7 +1153,7 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); --struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); - - struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); -@@ -1251,7 +1254,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun - bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, - const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); - bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); -+ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); - int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); - - int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 0695f7864bf..29e0ff0c5be 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -138,14 +138,6 @@ static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) - return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); - } - --static struct list *block_to_list(struct hlsl_block *block) --{ -- /* This is a temporary hack to ease the transition from lists to blocks. -- * It takes advantage of the fact that an allocated hlsl_block pointer is -- * byte-compatible with an allocated list pointer. */ -- return &block->instrs; --} -- - static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) - { - struct hlsl_block *block; -@@ -351,7 +343,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl - - dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - -- if (!(component_load = hlsl_add_load_component(ctx, block_to_list(block), node, src_idx, loc))) -+ if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) - return NULL; - - if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) -@@ -677,11 +669,11 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, - return true; - } - --struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -- unsigned int comp, const struct vkd3d_shader_location *loc) -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *load, *store; -- struct hlsl_block block; -+ struct hlsl_block load_block; - struct hlsl_ir_var *var; - struct hlsl_deref src; - -@@ -690,12 +682,12 @@ struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list * - - if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) - return NULL; -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - - hlsl_init_simple_deref_from_var(&src, var); -- if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) -+ if (!(load = hlsl_new_load_component(ctx, &load_block, &src, comp, loc))) - return NULL; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(block, &load_block); - - return load; - } -@@ -1340,7 +1332,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl - { - if (operands[j]) - { -- if (!(load = hlsl_add_load_component(ctx, block_to_list(block), operands[j], i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, block, operands[j], i, loc))) - return NULL; - - cell_operands[j] = load; -@@ -1822,7 +1814,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - return NULL; - hlsl_block_add_instr(block, cell); - -- if (!(load = hlsl_add_load_component(ctx, block_to_list(block), rhs, k++, &rhs->loc))) -+ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) - return NULL; - - if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) -@@ -1911,7 +1903,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - struct hlsl_type *dst_comp_type; - struct hlsl_block block; - -- if (!(load = hlsl_add_load_component(ctx, block_to_list(instrs), src, k, &src->loc))) -+ if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) - return; - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); -@@ -2139,6 +2131,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - if (var->semantic.name) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Semantics are not allowed on local variables."); -+ -+ if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count && !(modifiers & HLSL_STORAGE_STATIC)) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, -+ "Const variable \"%s\" is missing an initializer.", var->name); -+ } - } - - if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -@@ -2148,15 +2146,6 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - "Static variables cannot have both numeric and resource components."); - } - -- if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count -- && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, -- "Const variable \"%s\" is missing an initializer.", var->name); -- hlsl_free_var(var); -- return; -- } -- - if (!hlsl_add_var(ctx, var, local)) - { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -@@ -2469,7 +2458,7 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { -- if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) -@@ -2513,7 +2502,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { -- if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - - if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) -@@ -3170,11 +3159,11 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *value1, *value2, *mul; - -- if (!(value1 = hlsl_add_load_component(ctx, block_to_list(params->instrs), -+ if (!(value1 = hlsl_add_load_component(ctx, params->instrs, - cast1, j * cast1->data_type->dimx + k, loc))) - return false; - -- if (!(value2 = hlsl_add_load_component(ctx, block_to_list(params->instrs), -+ if (!(value2 = hlsl_add_load_component(ctx, params->instrs, - cast2, k * cast2->data_type->dimx + i, loc))) - return false; - -@@ -3531,7 +3520,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - { - struct hlsl_block block; - -- if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, j * arg->data_type->dimx + i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) - return false; - - if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) -@@ -4193,7 +4182,7 @@ static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_bloc - if (!dest) - return true; - -- if (!(load = hlsl_add_load_component(ctx, block_to_list(instrs), src, component, loc))) -+ if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) - return false; - - if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) -@@ -5009,7 +4998,7 @@ attribute: - YYABORT; - } - $$->name = $2; -- list_init(&$$->instrs); -+ hlsl_block_init(&$$->instrs); - $$->loc = @$; - $$->args_count = 0; - } -@@ -5024,8 +5013,8 @@ attribute: - YYABORT; - } - $$->name = $2; -- list_init(&$$->instrs); -- list_move_tail(&$$->instrs, &$4.instrs->instrs); -+ hlsl_block_init(&$$->instrs); -+ hlsl_block_add_block(&$$->instrs, $4.instrs); - vkd3d_free($4.instrs); - $$->loc = @$; - $$->args_count = $4.args_count; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 4f5a5b02a67..bfa605f4ba7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -161,7 +161,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der - /* Split uniforms into two variables representing the constant and temp - * registers, and copy the former to the latter, so that writes to uniforms - * work. */ --static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) -+static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) - { - struct vkd3d_string_buffer *name; - struct hlsl_ir_var *uniform; -@@ -188,7 +188,7 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru - - if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) - return; -- list_add_head(instrs, &load->node.entry); -+ list_add_head(&block->instrs, &load->node.entry); - - if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) - return; -@@ -301,7 +301,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - return ext_var; - } - --static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, -+static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, - unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; -@@ -364,7 +364,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct - } - } - --static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, -+static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, - unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct vkd3d_shader_location *loc = &lhs->node.loc; -@@ -406,30 +406,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs - return; - list_add_after(&c->entry, &element_load->node.entry); - -- prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); -+ prepend_input_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); - } - } - else - { -- prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); -+ prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); - } - } - - /* Split inputs into two variables representing the semantic and temp registers, - * and copy the former to the latter, so that writes to input variables work. */ --static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) -+static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- list_add_head(instrs, &load->node.entry); -+ list_add_head(&block->instrs, &load->node.entry); - -- prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); - } - --static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, -+static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, - unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct hlsl_type *type = rhs->node.data_type, *vector_type; -@@ -464,11 +464,11 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct - { - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- list_add_tail(instrs, &c->entry); -+ hlsl_block_add_instr(block, c); - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) - return; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - } - else - { -@@ -476,16 +476,16 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) - return; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - } - - if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) - return; -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - } - } - --static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, -+static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, - unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct vkd3d_shader_location *loc = &rhs->node.loc; -@@ -520,34 +520,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs - - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- list_add_tail(instrs, &c->entry); -+ hlsl_block_add_instr(block, c); - - if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) - return; -- list_add_tail(instrs, &element_load->node.entry); -+ hlsl_block_add_instr(block, &element_load->node); - -- append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); -+ append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); - } - } - else - { -- append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); -+ append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); - } - } - - /* Split outputs into two variables representing the temp and semantic - * registers, and copy the former to the latter, so that reads from output - * variables work. */ --static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) -+static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - -- append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); - } - - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), -@@ -2191,6 +2191,44 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - return true; - } - -+static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl_ir_var *to_add, -+ enum hlsl_regset regset) -+{ -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(var, list, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->bind_count[regset] < to_add->bind_count[regset]) -+ { -+ list_add_before(&var->extern_entry, &to_add->extern_entry); -+ return; -+ } -+ } -+ -+ list_add_tail(list, &to_add->extern_entry); -+} -+ -+static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) -+{ -+ struct list separated_resources; -+ struct hlsl_ir_var *var, *next; -+ -+ list_init(&separated_resources); -+ -+ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_separated_resource) -+ { -+ list_remove(&var->extern_entry); -+ insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_TEXTURES); -+ } -+ } -+ -+ list_move_head(&ctx->extern_vars, &separated_resources); -+ -+ return false; -+} -+ - /* Lower DIV to RCP + MUL. */ - static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -@@ -2738,7 +2776,7 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - count = hlsl_type_component_count(cmp_type); - for (i = 0; i < count; ++i) - { -- if (!(load = hlsl_add_load_component(ctx, &block.instrs, cmp, i, &instr->loc))) -+ if (!(load = hlsl_add_load_component(ctx, &block, cmp, i, &instr->loc))) - return false; - - if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) -@@ -2868,7 +2906,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - continue; - regset = hlsl_type_get_regset(var->data_type); - -- if (var->reg_reservation.reg_type && var->regs[regset].bind_count) -+ if (var->reg_reservation.reg_type && var->regs[regset].allocation_size) - { - if (var->reg_reservation.reg_type != get_regset_name(regset)) - { -@@ -2886,7 +2924,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - var->regs[regset].id = var->reg_reservation.reg_index; - TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, - var->reg_reservation.reg_index, var->reg_reservation.reg_type, -- var->reg_reservation.reg_index + var->regs[regset].bind_count); -+ var->reg_reservation.reg_index + var->regs[regset].allocation_size); - } - } - } -@@ -3144,7 +3182,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); - - ret.id = reg_idx; -- ret.bind_count = 1; -+ ret.allocation_size = 1; - ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); - ret.allocated = true; - return ret; -@@ -3180,7 +3218,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo - record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); - - ret.id = reg_idx; -- ret.bind_count = align(reg_size, 4) / 4; -+ ret.allocation_size = align(reg_size, 4) / 4; - ret.allocated = true; - return ret; - } -@@ -3275,6 +3313,7 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n - return false; - - var->objects_usage[regset][index].used = true; -+ var->bind_count[regset] = max(var->bind_count[regset], index + 1); - if (load->sampler.var) - { - var = load->sampler.var; -@@ -3282,6 +3321,7 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n - return false; - - var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; -+ var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); - } - - return false; -@@ -3291,7 +3331,7 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) - { - struct hlsl_ir_var *var; - struct hlsl_type *type; -- unsigned int i, k; -+ unsigned int k; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -@@ -3299,15 +3339,10 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) - - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - { -- for (i = 0; i < type->reg_size[k]; ++i) -- { -- bool is_separated = var->is_separated_resource; -+ bool is_separated = var->is_separated_resource; - -- /* Samplers (and textures separated from them) are only allocated until the last -- * used one. */ -- if (var->objects_usage[k][i].used) -- var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS || is_separated) ? i + 1 : type->reg_size[k]; -- } -+ if (var->bind_count[k] > 0) -+ var->regs[k].allocation_size = (k == HLSL_REGSET_SAMPLERS || is_separated) ? var->bind_count[k] : type->reg_size[k]; - } - } - } -@@ -3613,7 +3648,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - { - var->regs[HLSL_REGSET_NUMERIC].allocated = true; - var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; -- var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; -+ var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; - var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; - TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', - var->regs[HLSL_REGSET_NUMERIC], var->data_type)); -@@ -3792,7 +3827,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - } - - buffer->reg.id = buffer->reservation.reg_index; -- buffer->reg.bind_count = 1; -+ buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; - TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); - } -@@ -3802,7 +3837,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - ++index; - - buffer->reg.id = index; -- buffer->reg.bind_count = 1; -+ buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; - TRACE("Allocated %s to cb%u.\n", buffer->name, index); - ++index; -@@ -3842,7 +3877,7 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - else if (var->regs[regset].allocated) - { - start = var->regs[regset].id; -- count = var->regs[regset].bind_count; -+ count = var->regs[regset].allocation_size; - } - else - { -@@ -3873,7 +3908,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- unsigned int count = var->regs[regset].bind_count; -+ unsigned int count = var->regs[regset].allocation_size; - - if (count == 0) - continue; -@@ -4221,7 +4256,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) - { - if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -- prepend_uniform_copy(ctx, &body->instrs, var); -+ prepend_uniform_copy(ctx, body, var); - } - - for (i = 0; i < entry_func->parameters.count; ++i) -@@ -4230,7 +4265,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - - if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) - { -- prepend_uniform_copy(ctx, &body->instrs, var); -+ prepend_uniform_copy(ctx, body, var); - } - else - { -@@ -4246,9 +4281,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - } - - if (var->storage_modifiers & HLSL_STORAGE_IN) -- prepend_input_var_copy(ctx, &body->instrs, var); -+ prepend_input_var_copy(ctx, body, var); - if (var->storage_modifiers & HLSL_STORAGE_OUT) -- append_output_var_copy(ctx, &body->instrs, var); -+ append_output_var_copy(ctx, body, var); - } - } - if (entry_func->return_var) -@@ -4257,7 +4292,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); - -- append_output_var_copy(ctx, &body->instrs, entry_func->return_var); -+ append_output_var_copy(ctx, body, entry_func->return_var); - } - - for (i = 0; i < entry_func->attr_count; ++i) -@@ -4316,6 +4351,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - if (profile->major_version >= 4) - hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); - hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); -+ sort_synthetic_separated_samplers_first(ctx); - - if (profile->major_version < 4) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 01c438ae212..41a72ab6c0d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -152,6 +152,51 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - return true; - } - -+static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, -+ "Indefinite logarithm result."); -+ } -+ dst->u[k].f = log2f(src->value.u[k].f); -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, -+ "Infinities and NaNs are not allowed by the shader model."); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (src->value.u[k].d < 0.0) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, -+ "Indefinite logarithm result."); -+ } -+ dst->u[k].d = log2(src->value.u[k].d); -+ break; -+ -+ default: -+ FIXME("Fold 'log2' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -@@ -194,7 +239,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - - assert(type == src->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -@@ -231,6 +276,51 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - -+static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, -+ "Imaginary square root result."); -+ } -+ dst->u[k].f = sqrtf(src->value.u[k].f); -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, -+ "Infinities and NaNs are not allowed by the shader model."); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (src->value.u[k].d < 0.0) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, -+ "Imaginary square root result."); -+ } -+ dst->u[k].d = sqrt(src->value.u[k].d); -+ break; -+ -+ default: -+ FIXME("Fold 'sqrt' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -@@ -348,6 +438,64 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - return true; - } - -+static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ -+ dst->u[0].f = 0.0f; -+ for (k = 0; k < src1->node.data_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; -+ break; -+ default: -+ FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ -+static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ assert(type == src3->node.data_type->base_type); -+ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ assert(src3->node.data_type->dimx == 1); -+ -+ dst->u[0].f = src3->value.u[0].f; -+ for (k = 0; k < src1->node.data_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; -+ break; -+ default: -+ FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, - const struct vkd3d_shader_location *loc) -@@ -723,7 +871,7 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - - bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -- struct hlsl_ir_constant *arg1, *arg2 = NULL; -+ struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; - struct hlsl_constant_value res = {0}; - struct hlsl_ir_node *res_node; - struct hlsl_ir_expr *expr; -@@ -751,6 +899,8 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - arg1 = hlsl_ir_constant(expr->operands[0].node); - if (expr->operands[1].node) - arg2 = hlsl_ir_constant(expr->operands[1].node); -+ if (expr->operands[2].node) -+ arg3 = hlsl_ir_constant(expr->operands[2].node); - - switch (expr->op) - { -@@ -762,6 +912,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_cast(ctx, &res, instr->data_type, arg1); - break; - -+ case HLSL_OP1_LOG2: -+ success = fold_log2(ctx, &res, instr->data_type, arg1, &instr->loc); -+ break; -+ - case HLSL_OP1_NEG: - success = fold_neg(ctx, &res, instr->data_type, arg1); - break; -@@ -770,6 +924,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); - break; - -+ case HLSL_OP1_SQRT: -+ success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); -+ break; -+ - case HLSL_OP2_ADD: - success = fold_add(ctx, &res, instr->data_type, arg1, arg2); - break; -@@ -788,6 +946,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); - break; - -+ case HLSL_OP2_DOT: -+ success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ - case HLSL_OP2_DIV: - success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); - break; -@@ -824,6 +986,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); - break; - -+ case HLSL_OP3_DP2ADD: -+ success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); -+ break; -+ - default: - FIXME("Fold \"%s\" expression.\n", debug_hlsl_expr_op(expr->op)); - success = false; -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 2725ed80cd1..fa605f185ae 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -2310,7 +2310,7 @@ struct spirv_compiler - - uint32_t binding_idx; - -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; - unsigned int input_control_point_count; - unsigned int output_control_point_count; - bool use_vocp; -@@ -2380,7 +2380,7 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) - - static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) - { - const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; -@@ -5695,13 +5695,13 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty - } - } - --static const struct vkd3d_shader_descriptor_info *spirv_compiler_get_descriptor_info( -+static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( - struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, - const struct vkd3d_shader_register_range *range) - { -- const struct vkd3d_shader_scan_descriptor_info *descriptor_info = compiler->scan_descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; - unsigned int register_last = (range->last == ~0u) ? range->first : range->last; -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - unsigned int i; - - for (i = 0; i < descriptor_info->descriptor_count; ++i) -@@ -5721,7 +5721,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler - bool raw_structured, uint32_t depth) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - bool uav_read, uav_atomics; - uint32_t sampled_type_id; - SpvImageFormat format; -@@ -5756,7 +5756,7 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi - const struct vkd3d_shader_combined_resource_sampler *current; - uint32_t image_type_id, type_id, ptr_type_id, var_id; - enum vkd3d_shader_binding_flag resource_type_flag; -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - struct vkd3d_symbol symbol; - unsigned int i; - bool depth; -@@ -5889,7 +5889,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - - if (is_uav) - { -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - - d = spirv_compiler_get_descriptor_info(compiler, - VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); -@@ -9635,7 +9635,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - } - - int spirv_compile(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 351943e2e53..550f9b27cc7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -430,6 +430,8 @@ enum vkd3d_sm4_register_type - VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, - VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, - VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, -+ -+ VKD3D_SM4_REGISTER_TYPE_COUNT, - }; - - enum vkd3d_sm4_extended_operand_type -@@ -571,6 +573,12 @@ struct sm4_index_range_array - struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; - }; - -+struct vkd3d_sm4_lookup_tables -+{ -+ const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; -+ const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; -+}; -+ - struct vkd3d_shader_sm4_parser - { - const uint32_t *start, *end, *ptr; -@@ -587,6 +595,8 @@ struct vkd3d_shader_sm4_parser - struct sm4_index_range_array output_index_ranges; - struct sm4_index_range_array patch_constant_index_ranges; - -+ struct vkd3d_sm4_lookup_tables lookup; -+ - struct vkd3d_shader_parser p; - }; - -@@ -1468,50 +1478,10 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = - {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, - }; - --static const enum vkd3d_shader_register_type register_type_table[] = --{ -- /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, -- /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, -- /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, -- /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, -- /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, -- /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, -- /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, -- /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, -- /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, -- /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, -- /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, -- /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, -- /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, -- /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, -- /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, -- /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, -- /* UNKNOWN */ ~0u, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, -- /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, -- /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, -- /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, -- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, -- /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, -- /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, -- /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, -- /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, -- /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, -- /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, -- /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, -- /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, -- /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, -- /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, -- /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, -- /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, -- /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, -+struct vkd3d_sm4_register_type_info -+{ -+ enum vkd3d_sm4_register_type sm4_type; -+ enum vkd3d_shader_register_type vkd3d_type; - }; - - static const enum vkd3d_shader_register_precision register_precision_table[] = -@@ -1524,18 +1494,104 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = - /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, - }; - -+struct tpf_writer -+{ -+ struct hlsl_ctx *ctx; -+ struct vkd3d_bytecode_buffer *buffer; -+ struct vkd3d_sm4_lookup_tables lookup; -+}; -+ - static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) - { - unsigned int i; - - for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) - { -- if (opcode == opcode_table[i].opcode) return &opcode_table[i]; -+ if (opcode == opcode_table[i].opcode) -+ return &opcode_table[i]; - } - - return NULL; - } - -+static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) -+{ -+ const struct vkd3d_sm4_register_type_info *info; -+ unsigned int i; -+ -+ static const struct vkd3d_sm4_register_type_info register_type_table[] = -+ { -+ {VKD3D_SM4_RT_TEMP, VKD3DSPR_TEMP}, -+ {VKD3D_SM4_RT_INPUT, VKD3DSPR_INPUT}, -+ {VKD3D_SM4_RT_OUTPUT, VKD3DSPR_OUTPUT}, -+ {VKD3D_SM4_RT_INDEXABLE_TEMP, VKD3DSPR_IDXTEMP}, -+ {VKD3D_SM4_RT_IMMCONST, VKD3DSPR_IMMCONST}, -+ {VKD3D_SM4_RT_IMMCONST64, VKD3DSPR_IMMCONST64}, -+ {VKD3D_SM4_RT_SAMPLER, VKD3DSPR_SAMPLER}, -+ {VKD3D_SM4_RT_RESOURCE, VKD3DSPR_RESOURCE}, -+ {VKD3D_SM4_RT_CONSTBUFFER, VKD3DSPR_CONSTBUFFER}, -+ {VKD3D_SM4_RT_IMMCONSTBUFFER, VKD3DSPR_IMMCONSTBUFFER}, -+ {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID}, -+ {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT}, -+ {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL}, -+ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER}, -+ {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK}, -+ {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM}, -+ {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY}, -+ {VKD3D_SM5_RT_FUNCTION_POINTER, VKD3DSPR_FUNCTIONPOINTER}, -+ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID, VKD3DSPR_OUTPOINTID}, -+ {VKD3D_SM5_RT_FORK_INSTANCE_ID, VKD3DSPR_FORKINSTID}, -+ {VKD3D_SM5_RT_JOIN_INSTANCE_ID, VKD3DSPR_JOININSTID}, -+ {VKD3D_SM5_RT_INPUT_CONTROL_POINT, VKD3DSPR_INCONTROLPOINT}, -+ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT, VKD3DSPR_OUTCONTROLPOINT}, -+ {VKD3D_SM5_RT_PATCH_CONSTANT_DATA, VKD3DSPR_PATCHCONST}, -+ {VKD3D_SM5_RT_DOMAIN_LOCATION, VKD3DSPR_TESSCOORD}, -+ {VKD3D_SM5_RT_UAV, VKD3DSPR_UAV}, -+ {VKD3D_SM5_RT_SHARED_MEMORY, VKD3DSPR_GROUPSHAREDMEM}, -+ {VKD3D_SM5_RT_THREAD_ID, VKD3DSPR_THREADID}, -+ {VKD3D_SM5_RT_THREAD_GROUP_ID, VKD3DSPR_THREADGROUPID}, -+ {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID}, -+ {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE}, -+ {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX}, -+ {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID}, -+ {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE}, -+ {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE}, -+ {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF}, -+ }; -+ -+ memset(lookup, 0, sizeof(*lookup)); -+ -+ for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) -+ { -+ info = ®ister_type_table[i]; -+ lookup->register_type_info_from_sm4[info->sm4_type] = info; -+ lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; -+ } -+} -+ -+static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+{ -+ tpf->ctx = ctx; -+ tpf->buffer = buffer; -+ init_sm4_lookup_tables(&tpf->lookup); -+} -+ -+static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( -+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) -+{ -+ if (sm4_type >= VKD3D_SM4_REGISTER_TYPE_COUNT) -+ return NULL; -+ return lookup->register_type_info_from_sm4[sm4_type]; -+} -+ -+static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_type( -+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) -+{ -+ if (vkd3d_type >= VKD3DSPR_COUNT) -+ return NULL; -+ return lookup->register_type_info_from_vkd3d[vkd3d_type]; -+} -+ - static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) - { - switch (sm4->p.shader_version.type) -@@ -1642,6 +1698,7 @@ static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_typ - static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, - enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) - { -+ const struct vkd3d_sm4_register_type_info *register_type_info; - enum vkd3d_sm4_register_precision precision; - enum vkd3d_sm4_register_type register_type; - enum vkd3d_sm4_extended_operand_type type; -@@ -1656,15 +1713,15 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui - token = *(*ptr)++; - - register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; -- if (register_type >= ARRAY_SIZE(register_type_table) -- || register_type_table[register_type] == VKD3DSPR_INVALID) -+ register_type_info = get_info_from_sm4_register_type(&priv->lookup, register_type); -+ if (!register_type_info) - { - FIXME("Unhandled register type %#x.\n", register_type); - param->type = VKD3DSPR_TEMP; - } - else - { -- param->type = register_type_table[register_type]; -+ param->type = register_type_info->vkd3d_type; - } - param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - param->non_uniform = false; -@@ -2364,6 +2421,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t - sm4->output_map[e->register_index] = e->semantic_index; - } - -+ init_sm4_lookup_tables(&sm4->lookup); -+ - return true; - } - -@@ -2502,7 +2561,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; - } - --static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); -+static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); - - static bool type_is_integer(const struct hlsl_type *type) - { -@@ -2519,7 +2578,7 @@ static bool type_is_integer(const struct hlsl_type *type) - } - - bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) -+ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) - { - unsigned int i; - -@@ -2529,24 +2588,24 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - bool output; - enum vkd3d_shader_type shader_type; - enum vkd3d_sm4_swizzle_type swizzle_type; -- enum vkd3d_sm4_register_type type; -+ enum vkd3d_shader_register_type type; - bool has_idx; - } - register_table[] = - { -- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, -- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, -- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, -+ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADID, false}, -+ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADGROUPID, false}, -+ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_LOCALTHREADID, false}, - -- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3DSPR_PRIMID, false}, - - /* Put sv_target in this table, instead of letting it fall through to - * default varying allocation, so that the register index matches the - * usage index. */ -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) -@@ -2555,7 +2614,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type) - { -- *type = register_table[i].type; -+ if (type) -+ *type = register_table[i].type; - if (swizzle_type) - *swizzle_type = register_table[i].swizzle_type; - *has_idx = register_table[i].has_idx; -@@ -2656,7 +2716,6 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; -- enum vkd3d_sm4_register_type type; - uint32_t usage_idx, reg_idx; - D3D_NAME usage; - bool has_idx; -@@ -2670,14 +2729,13 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - continue; - usage_idx = var->semantic.index; - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) -+ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, NULL, &has_idx)) - { - reg_idx = has_idx ? var->semantic.index : ~0u; - } - else - { - assert(var->regs[HLSL_REGSET_NUMERIC].allocated); -- type = VKD3D_SM4_RT_INPUT; - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } - -@@ -3061,7 +3119,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - regset = hlsl_type_get_regset(component_type); - regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); - -- if (regset_offset > var->regs[regset].bind_count) -+ if (regset_offset > var->regs[regset].allocation_size) - continue; - - if (var->objects_usage[regset][regset_offset].used) -@@ -3134,7 +3192,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - - extern_resources[*count].regset = regset; - extern_resources[*count].id = var->regs[regset].id; -- extern_resources[*count].bind_count = var->regs[regset].bind_count; -+ extern_resources[*count].bind_count = var->bind_count[regset]; - - ++*count; - } -@@ -3435,8 +3493,8 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod - - struct sm4_register - { -- enum vkd3d_sm4_register_type type; -- uint32_t idx[2]; -+ enum vkd3d_shader_register_type type; -+ struct vkd3d_shader_register_index idx[2]; - unsigned int idx_count; - enum vkd3d_sm4_dimension dim; - uint32_t immconst_uint[4]; -@@ -3484,36 +3542,36 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - - if (regset == HLSL_REGSET_TEXTURES) - { -- reg->type = VKD3D_SM4_RT_RESOURCE; -+ reg->type = VKD3DSPR_RESOURCE; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; -- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - assert(regset == HLSL_REGSET_TEXTURES); - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_UAVS) - { -- reg->type = VKD3D_SM5_RT_UAV; -+ reg->type = VKD3DSPR_UAV; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; -- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - assert(regset == HLSL_REGSET_UAVS); - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_SAMPLERS) - { -- reg->type = VKD3D_SM4_RT_SAMPLER; -+ reg->type = VKD3DSPR_SAMPLER; - reg->dim = VKD3D_SM4_DIMENSION_NONE; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; -- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - assert(regset == HLSL_REGSET_SAMPLERS); - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; -@@ -3523,12 +3581,12 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - - assert(data_type->class <= HLSL_CLASS_VECTOR); -- reg->type = VKD3D_SM4_RT_CONSTBUFFER; -+ reg->type = VKD3DSPR_CONSTBUFFER; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = var->buffer->reg.id; -- reg->idx[1] = offset / 4; -+ reg->idx[0].offset = var->buffer->reg.id; -+ reg->idx[1].offset = offset / 4; - reg->idx_count = 2; - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } -@@ -3543,7 +3601,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - - if (has_idx) - { -- reg->idx[0] = var->semantic.index + offset / 4; -+ reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - -@@ -3555,11 +3613,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); -- reg->type = VKD3D_SM4_RT_INPUT; -+ reg->type = VKD3DSPR_INPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = hlsl_reg.id; -+ reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } -@@ -3574,11 +3632,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - - if (has_idx) - { -- reg->idx[0] = var->semantic.index + offset / 4; -+ reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - -- if (reg->type == VKD3D_SM4_RT_DEPTHOUT) -+ if (reg->type == VKD3DSPR_DEPTHOUT) - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - else - reg->dim = VKD3D_SM4_DIMENSION_VEC4; -@@ -3589,9 +3647,9 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); -- reg->type = VKD3D_SM4_RT_OUTPUT; -+ reg->type = VKD3DSPR_OUTPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- reg->idx[0] = hlsl_reg.id; -+ reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } -@@ -3601,11 +3659,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); -- reg->type = VKD3D_SM4_RT_TEMP; -+ reg->type = VKD3DSPR_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = hlsl_reg.id; -+ reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } -@@ -3625,10 +3683,10 @@ static void sm4_register_from_node(struct sm4_register *reg, unsigned int *write - enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) - { - assert(instr->reg.allocated); -- reg->type = VKD3D_SM4_RT_TEMP; -+ reg->type = VKD3DSPR_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = instr->reg.id; -+ reg->idx[0].offset = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; - } -@@ -3644,7 +3702,7 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, - const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) - { - src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- src->reg.type = VKD3D_SM4_RT_IMMCONST; -+ src->reg.type = VKD3DSPR_IMMCONST; - if (width == 1) - { - src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; -@@ -3681,17 +3739,100 @@ static void sm4_src_from_node(struct sm4_src_register *src, - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - } - --static uint32_t sm4_encode_register(const struct sm4_register *reg) -+static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct sm4_dst_register *dst) -+{ -+ const struct vkd3d_sm4_register_type_info *register_type_info; -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; -+ uint32_t sm4_reg_type, reg_dim; -+ uint32_t token = 0; -+ unsigned int j; -+ -+ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); -+ if (!register_type_info) -+ { -+ FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); -+ sm4_reg_type = VKD3D_SM4_RT_TEMP; -+ } -+ else -+ { -+ sm4_reg_type = register_type_info->sm4_type; -+ } -+ -+ reg_dim = dst->reg.dim; -+ -+ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; -+ token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; -+ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; -+ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) -+ token |= dst->writemask << VKD3D_SM4_WRITEMASK_SHIFT; -+ put_u32(buffer, token); -+ -+ for (j = 0; j < dst->reg.idx_count; ++j) -+ { -+ put_u32(buffer, dst->reg.idx[j].offset); -+ assert(!dst->reg.idx[j].rel_addr); -+ } -+} -+ -+static void sm4_write_src_register(const struct tpf_writer *tpf, const struct sm4_src_register *src) - { -- return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) -- | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) -- | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); -+ const struct vkd3d_sm4_register_type_info *register_type_info; -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; -+ uint32_t sm4_reg_type, reg_dim; -+ uint32_t token = 0; -+ unsigned int j; -+ -+ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); -+ if (!register_type_info) -+ { -+ FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); -+ sm4_reg_type = VKD3D_SM4_RT_TEMP; -+ } -+ else -+ { -+ sm4_reg_type = register_type_info->sm4_type; -+ } -+ -+ reg_dim = src->reg.dim; -+ -+ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; -+ token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; -+ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; -+ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) -+ { -+ token |= (uint32_t)src->swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -+ token |= src->swizzle << VKD3D_SM4_SWIZZLE_SHIFT; -+ } -+ if (src->reg.mod) -+ token |= VKD3D_SM4_EXTENDED_OPERAND; -+ put_u32(buffer, token); -+ -+ if (src->reg.mod) -+ put_u32(buffer, (src->reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) -+ | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); -+ -+ for (j = 0; j < src->reg.idx_count; ++j) -+ { -+ put_u32(buffer, src->reg.idx[j].offset); -+ assert(!src->reg.idx[j].rel_addr); -+ } -+ -+ if (src->reg.type == VKD3DSPR_IMMCONST) -+ { -+ put_u32(buffer, src->reg.immconst_uint[0]); -+ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) -+ { -+ put_u32(buffer, src->reg.immconst_uint[1]); -+ put_u32(buffer, src->reg.immconst_uint[2]); -+ put_u32(buffer, src->reg.immconst_uint[3]); -+ } -+ } - } - - static uint32_t sm4_register_order(const struct sm4_register *reg) - { - uint32_t order = 1; -- if (reg->type == VKD3D_SM4_RT_IMMCONST) -+ if (reg->type == VKD3DSPR_IMMCONST) - order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; - order += reg->idx_count; - if (reg->mod) -@@ -3699,8 +3840,9 @@ static uint32_t sm4_register_order(const struct sm4_register *reg) - return order; - } - --static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) -+static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) - { -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t token = instr->opcode; - unsigned int size = 1, i, j; - -@@ -3728,43 +3870,10 @@ static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const st - } - - for (i = 0; i < instr->dst_count; ++i) -- { -- token = sm4_encode_register(&instr->dsts[i].reg); -- if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) -- token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; -- put_u32(buffer, token); -- -- for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) -- put_u32(buffer, instr->dsts[i].reg.idx[j]); -- } -+ sm4_write_dst_register(tpf, &instr->dsts[i]); - - for (i = 0; i < instr->src_count; ++i) -- { -- token = sm4_encode_register(&instr->srcs[i].reg); -- token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -- token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; -- if (instr->srcs[i].reg.mod) -- token |= VKD3D_SM4_EXTENDED_OPERAND; -- put_u32(buffer, token); -- -- if (instr->srcs[i].reg.mod) -- put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) -- | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); -- -- for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) -- put_u32(buffer, instr->srcs[i].reg.idx[j]); -- -- if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) -- { -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); -- if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) -- { -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); -- } -- } -- } -+ sm4_write_src_register(tpf, &instr->srcs[i]); - - if (instr->byte_stride) - put_u32(buffer, instr->byte_stride); -@@ -3800,25 +3909,25 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - return true; - } - --static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) -+static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) - { - const struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - - .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, -- .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, -- .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, -+ .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, -+ .srcs[0].reg.idx[0].offset = cbuffer->reg.id, -+ .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, - .srcs[0].reg.idx_count = 2, - .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, - .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), - .src_count = 1, - }; -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_samplers(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct extern_resource *resource) -+static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) - { - struct hlsl_type *component_type; - unsigned int i; -@@ -3826,12 +3935,12 @@ static void write_sm4_dcl_samplers(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - -- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, -+ .dsts[0].reg.type = VKD3DSPR_SAMPLER, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - -- component_type = hlsl_type_get_component_type(ctx, resource->data_type, 0); -+ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); - - if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) - instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; -@@ -3843,13 +3952,13 @@ static void write_sm4_dcl_samplers(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - continue; - -- instr.dsts[0].reg.idx[0] = resource->id + i; -- write_sm4_instruction(buffer, &instr); -+ instr.dsts[0].reg.idx[0].offset = resource->id + i; -+ write_sm4_instruction(tpf, &instr); - } - } - --static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct extern_resource *resource, bool uav) -+static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, -+ bool uav) - { - enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - struct hlsl_type *component_type; -@@ -3858,7 +3967,7 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - - assert(resource->regset == regset); - -- component_type = hlsl_type_get_component_type(ctx, resource->data_type, 0); -+ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); - - for (i = 0; i < resource->bind_count; ++i) - { -@@ -3867,8 +3976,8 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - - instr = (struct sm4_instruction) - { -- .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, -- .dsts[0].reg.idx = {resource->id + i}, -+ .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, -+ .dsts[0].reg.idx[0].offset = resource->id + i, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - -@@ -3901,13 +4010,13 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - } - --static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -+static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) - { -- const struct hlsl_profile_info *profile = ctx->profile; -+ const struct hlsl_profile_info *profile = tpf->ctx->profile; - const bool output = var->is_output_semantic; - D3D_NAME usage; - bool has_idx; -@@ -3918,11 +4027,11 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - .dst_count = 1, - }; - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) -+ if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) - { - if (has_idx) - { -- instr.dsts[0].reg.idx[0] = var->semantic.index; -+ instr.dsts[0].reg.idx[0].offset = var->semantic.index; - instr.dsts[0].reg.idx_count = 1; - } - else -@@ -3933,16 +4042,16 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - } - else - { -- instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; -- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; -+ instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -+ instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; - instr.dsts[0].reg.idx_count = 1; - instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; - } - -- if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) -+ if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) - instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - -- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -+ hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); - if (usage == ~0u) - usage = D3D_NAME_UNDEFINED; - -@@ -4002,10 +4111,10 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - break; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) -+static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) - { - struct sm4_instruction instr = - { -@@ -4015,33 +4124,35 @@ static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t t - .idx_count = 1, - }; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) -+static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) - { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, - -- .idx = {thread_count[0], thread_count[1], thread_count[2]}, -+ .idx[0] = thread_count[0], -+ .idx[1] = thread_count[1], -+ .idx[2] = thread_count[2], - .idx_count = 3, - }; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) -+static void write_sm4_ret(const struct tpf_writer *tpf) - { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_RET, - }; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) - { - struct sm4_instruction instr; -@@ -4056,12 +4167,11 @@ static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_ - instr.srcs[0].reg.mod = src_mod; - instr.src_count = 1; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, -- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, -- const struct hlsl_ir_node *src) -+static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src) - { - struct sm4_instruction instr; - -@@ -4071,7 +4181,7 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); -- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; -+ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; -@@ -4079,10 +4189,10 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe - sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); - instr.src_count = 1; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { - struct sm4_instruction instr; -@@ -4097,11 +4207,11 @@ static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - - /* dp# instructions don't map the swizzle. */ --static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { - struct sm4_instruction instr; -@@ -4116,10 +4226,10 @@ static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum v - sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, -+static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { -@@ -4131,7 +4241,7 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); -- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; -+ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; -@@ -4140,15 +4250,15 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, -- const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, -- const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) -+static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, -+ const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, -+ const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, -+ enum hlsl_sampler_dim dim) - { -- const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, resource); -+ const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); - bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); -@@ -4165,7 +4275,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { -- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } -@@ -4188,7 +4298,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - - sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); - -- sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); - - instr.src_count = 2; - -@@ -4203,13 +4313,13 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - - memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); - instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- reg->type = VKD3D_SM4_RT_IMMCONST; -+ reg->type = VKD3DSPR_IMMCONST; - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - reg->immconst_uint[0] = index->value.u[0].u; - } -- else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) -+ else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) - { -- hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); -+ hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); - } - else - { -@@ -4219,11 +4329,10 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - ++instr.src_count; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_resource_load *load) -+static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; -@@ -4267,7 +4376,7 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { -- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } -@@ -4277,8 +4386,8 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -- sm4_src_from_deref(ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 3; - - if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD -@@ -4300,11 +4409,10 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - ++instr.src_count; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_sampleinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_resource_load *load) -+static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; -@@ -4320,14 +4428,13 @@ static void write_sm4_sampleinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - -- sm4_src_from_deref(ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); - instr.src_count = 1; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_resinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_resource_load *load) -+static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; -@@ -4344,10 +4451,10 @@ static void write_sm4_resinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - - static bool type_is_float(const struct hlsl_type *type) -@@ -4355,8 +4462,7 @@ static bool type_is_float(const struct hlsl_type *type) - return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; - } - --static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, -+static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, - const struct hlsl_ir_node *arg, uint32_t mask) - { - struct sm4_instruction instr; -@@ -4369,16 +4475,15 @@ static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, - - sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); - instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; -+ instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; - instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - instr.srcs[1].reg.immconst_uint[0] = mask; - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_cast(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -+static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) - { - static const union - { -@@ -4400,23 +4505,23 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); -+ write_sm4_cast_from_bool(tpf, expr, arg1, one.u); - break; - - case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); - break; - - default: -@@ -4429,20 +4534,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); -+ write_sm4_cast_from_bool(tpf, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); - break; - - default: -@@ -4455,20 +4560,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); -+ write_sm4_cast_from_bool(tpf, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); - break; - - default: -@@ -4477,7 +4582,7 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - break; - - case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); - break; - - case HLSL_TYPE_BOOL: -@@ -4487,26 +4592,25 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - } - } - --static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) -+static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, -+ const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) - { - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - -- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); -+ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_expr(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -+static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) - { - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_ir_node *arg2 = expr->operands[1].node; -@@ -4515,7 +4619,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - - assert(expr->node.reg.allocated); - -- if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) -+ if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) - return; - - switch (expr->op) -@@ -4524,181 +4628,181 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_BIT_NOT: - assert(type_is_integer(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_CAST: -- write_sm4_cast(ctx, buffer, expr); -+ write_sm4_cast(tpf, expr); - break; - - case HLSL_OP1_COS: - assert(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); -+ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); - break; - - case HLSL_OP1_DSX: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSX_COARSE: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSX_FINE: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY_COARSE: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY_FINE: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_EXP2: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FLOOR: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FRACT: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOG2: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOGIC_NOT: - assert(dst_type->base_type == HLSL_TYPE_BOOL); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_NEG: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_REINTERPRET: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_OP1_ROUND: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_RSQ: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); - break; - - case HLSL_OP1_SAT: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV - | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), - &expr->node, arg1, 0); - break; - - case HLSL_OP1_SIN: - assert(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); -+ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); - break; - - case HLSL_OP1_SQRT: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_TRUNC: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); - break; - - case HLSL_OP2_ADD: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_BIT_AND: - assert(type_is_integer(dst_type)); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_OR: - assert(type_is_integer(dst_type)); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_XOR: - assert(type_is_integer(dst_type)); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_DIV: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); -+ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); - } - break; - -@@ -4709,15 +4813,15 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (arg1->data_type->dimx) - { - case 4: -- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); -+ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); - break; - - case 3: -- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); -+ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); - break; - - case 2: -- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); -+ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); - break; - - case 1: -@@ -4727,7 +4831,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); - } - break; - -@@ -4740,18 +4844,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; -@@ -4766,21 +4870,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; -@@ -4795,21 +4899,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; -@@ -4817,37 +4921,37 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - - case HLSL_OP2_LOGIC_AND: - assert(dst_type->base_type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: - assert(dst_type->base_type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_MAX: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); - } - break; - -@@ -4855,19 +4959,19 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); - } - break; - -@@ -4875,11 +4979,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (dst_type->base_type) - { - case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); -+ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); - } - break; - -@@ -4887,18 +4991,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - /* Using IMUL instead of UMUL because we're taking the low - * bits, and the native compiler generates IMUL. */ -- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); -+ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); - } - break; - -@@ -4911,18 +5015,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; -@@ -4931,18 +5035,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - case HLSL_OP2_RSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, -+ write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); - } - -- hlsl_release_string_buffer(ctx, dst_type_string); -+ hlsl_release_string_buffer(tpf->ctx, dst_type_string); - } - --static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) -+static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) - { - struct sm4_instruction instr = - { -@@ -4953,26 +5057,25 @@ static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - assert(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - -- write_sm4_block(ctx, buffer, &iff->then_block); -+ write_sm4_block(tpf, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - -- write_sm4_block(ctx, buffer, &iff->else_block); -+ write_sm4_block(tpf, &iff->else_block); - } - - instr.opcode = VKD3D_SM4_OP_ENDIF; - instr.src_count = 0; -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_jump(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) -+static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) - { - struct sm4_instruction instr = {0}; - -@@ -4996,11 +5099,11 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, - vkd3d_unreachable(); - - default: -- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -+ hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - return; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - - /* Does this variable's data come directly from the API user, rather than being -@@ -5014,8 +5117,7 @@ static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *va - return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; - } - --static void write_sm4_load(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) -+static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) - { - const struct hlsl_type *type = load->node.data_type; - struct sm4_instruction instr; -@@ -5026,7 +5128,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, - instr.dst_count = 1; - - assert(type->class <= HLSL_CLASS_LAST_NUMERIC); -- if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) -+ if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) - { - struct hlsl_constant_value value; - -@@ -5035,7 +5137,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, - - instr.opcode = VKD3D_SM4_OP_MOVC; - -- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); - - memset(&value, 0xff, sizeof(value)); - sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); -@@ -5047,32 +5149,31 @@ static void write_sm4_load(struct hlsl_ctx *ctx, - { - instr.opcode = VKD3D_SM4_OP_MOV; - -- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); - instr.src_count = 1; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_loop(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) -+static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) - { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_LOOP, - }; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - -- write_sm4_block(ctx, buffer, &loop->body); -+ write_sm4_block(tpf, &loop->body); - - instr.opcode = VKD3D_SM4_OP_ENDLOOP; -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_deref *sampler, -- const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) -+static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, -+ const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, -+ unsigned int swizzle, const struct hlsl_ir_node *texel_offset) - { - struct sm4_src_register *src; - struct sm4_instruction instr; -@@ -5090,9 +5191,9 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { -- if (ctx->profile->major_version < 5) -+ if (tpf->ctx->profile->major_version < 5) - { -- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); - return; - } -@@ -5101,19 +5202,18 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - } - } - -- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask); - - src = &instr.srcs[instr.src_count++]; -- sm4_src_from_deref(ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(tpf->ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); - src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; - src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; - src->swizzle = swizzle; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_resource_load(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) -+static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *sample_index = load->sample_index.node; -@@ -5121,20 +5221,20 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, - - if (load->sampler.var && !load->sampler.var->is_uniform) - { -- hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); -+ hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - - if (!load->resource.var->is_uniform) - { -- hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); -+ hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); - return; - } - - switch (load->load_type) - { - case HLSL_RESOURCE_LOAD: -- write_sm4_ld(ctx, buffer, &load->node, &load->resource, -+ write_sm4_ld(tpf, &load->node, &load->resource, - coords, sample_index, texel_offset, load->sampling_dim); - break; - -@@ -5146,61 +5246,59 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, - case HLSL_RESOURCE_SAMPLE_GRAD: - /* Combined sample expressions were lowered. */ - assert(load->sampler.var); -- write_sm4_sample(ctx, buffer, load); -+ write_sm4_sample(tpf, load); - break; - - case HLSL_RESOURCE_GATHER_RED: -- write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - HLSL_SWIZZLE(X, X, X, X), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_GREEN: -- write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_BLUE: -- write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_ALPHA: -- write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, - HLSL_SWIZZLE(W, W, W, W), texel_offset); - break; - - case HLSL_RESOURCE_SAMPLE_INFO: -- write_sm4_sampleinfo(ctx, buffer, load); -+ write_sm4_sampleinfo(tpf, load); - break; - - case HLSL_RESOURCE_RESINFO: -- write_sm4_resinfo(ctx, buffer, load); -+ write_sm4_resinfo(tpf, load); - break; - } - } - --static void write_sm4_resource_store(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) -+static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) - { -- struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); -+ struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); - - if (!store->resource.var->is_uniform) - { -- hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); -+ hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); - return; - } - - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { -- hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); -+ hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); - return; - } - -- write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); -+ write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); - } - --static void write_sm4_store(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) -+static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) - { - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; -@@ -5209,18 +5307,17 @@ static void write_sm4_store(struct hlsl_ctx *ctx, - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - -- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); -+ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); - instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); - instr.src_count = 1; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_swizzle(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) -+static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) - { - struct sm4_instruction instr; - unsigned int writemask; -@@ -5236,11 +5333,10 @@ static void write_sm4_swizzle(struct hlsl_ctx *ctx, - swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); - instr.src_count = 1; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_block *block) -+static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) - { - const struct hlsl_ir_node *instr; - -@@ -5250,12 +5346,12 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - { - if (instr->data_type->class == HLSL_CLASS_MATRIX) - { -- hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); -+ hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); - break; - } - else if (instr->data_type->class == HLSL_CLASS_OBJECT) - { -- hlsl_fixme(ctx, &instr->loc, "Object copy."); -+ hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); - break; - } - -@@ -5275,43 +5371,43 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - vkd3d_unreachable(); - - case HLSL_IR_EXPR: -- write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); -+ write_sm4_expr(tpf, hlsl_ir_expr(instr)); - break; - - case HLSL_IR_IF: -- write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); -+ write_sm4_if(tpf, hlsl_ir_if(instr)); - break; - - case HLSL_IR_JUMP: -- write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); -+ write_sm4_jump(tpf, hlsl_ir_jump(instr)); - break; - - case HLSL_IR_LOAD: -- write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); -+ write_sm4_load(tpf, hlsl_ir_load(instr)); - break; - - case HLSL_IR_RESOURCE_LOAD: -- write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); -+ write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); - break; - - case HLSL_IR_RESOURCE_STORE: -- write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); -+ write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); - break; - - case HLSL_IR_LOOP: -- write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); -+ write_sm4_loop(tpf, hlsl_ir_loop(instr)); - break; - - case HLSL_IR_STORE: -- write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); -+ write_sm4_store(tpf, hlsl_ir_store(instr)); - break; - - case HLSL_IR_SWIZZLE: -- write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); -+ write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); - break; - - default: -- hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); -+ hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } - } -@@ -5326,6 +5422,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - size_t token_count_position; -+ struct tpf_writer tpf; - - static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = - { -@@ -5340,6 +5437,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - VKD3D_SM4_LIB, - }; - -+ tpf_writer_init(&tpf, ctx, &buffer); -+ - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - - put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); -@@ -5348,7 +5447,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- write_sm4_dcl_constant_buffer(&buffer, cbuffer); -+ write_sm4_dcl_constant_buffer(&tpf, cbuffer); - } - - for (i = 0; i < extern_resources_count; ++i) -@@ -5356,28 +5455,28 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct extern_resource *resource = &extern_resources[i]; - - if (resource->regset == HLSL_REGSET_SAMPLERS) -- write_sm4_dcl_samplers(ctx, &buffer, resource); -+ write_sm4_dcl_samplers(&tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) -- write_sm4_dcl_textures(ctx, &buffer, resource, false); -+ write_sm4_dcl_textures(&tpf, resource, false); - else if (resource->regset == HLSL_REGSET_UAVS) -- write_sm4_dcl_textures(ctx, &buffer, resource, true); -+ write_sm4_dcl_textures(&tpf, resource, true); - } - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) -- write_sm4_dcl_semantic(ctx, &buffer, var); -+ write_sm4_dcl_semantic(&tpf, var); - } - - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) -- write_sm4_dcl_thread_group(&buffer, ctx->thread_count); -+ write_sm4_dcl_thread_group(&tpf, ctx->thread_count); - - if (ctx->temp_count) -- write_sm4_dcl_temps(&buffer, ctx->temp_count); -+ write_sm4_dcl_temps(&tpf, ctx->temp_count); - -- write_sm4_block(ctx, &buffer, &entry_func->body); -+ write_sm4_block(&tpf, &entry_func->body); - -- write_sm4_ret(&buffer); -+ write_sm4_ret(&tpf); - - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 512d9ea41e7..c777bad2206 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -579,7 +579,7 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig - - struct vkd3d_shader_scan_context - { -- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; - size_t descriptors_size; - - struct vkd3d_shader_message_context *message_context; -@@ -599,20 +599,12 @@ struct vkd3d_shader_scan_context - size_t cf_info_size; - size_t cf_info_count; - -- struct -- { -- unsigned int id; -- unsigned int descriptor_idx; -- } *uav_ranges; -- size_t uav_ranges_size; -- size_t uav_range_count; -- - enum vkd3d_shader_api_version api_version; - }; - - static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context) - { - unsigned int i; -@@ -635,7 +627,6 @@ static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *con - - static void vkd3d_shader_scan_context_cleanup(struct vkd3d_shader_scan_context *context) - { -- vkd3d_free(context->uav_ranges); - vkd3d_free(context->cf_info); - } - -@@ -703,18 +694,23 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_inf - return NULL; - } - --static struct vkd3d_shader_descriptor_info *vkd3d_shader_scan_get_uav_descriptor_info( -- const struct vkd3d_shader_scan_context *context, unsigned int range_id) -+static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_context *context, -+ const struct vkd3d_shader_register *reg, uint32_t flag) - { -+ unsigned int range_id = reg->idx[0].offset; - unsigned int i; - -- for (i = 0; i < context->uav_range_count; ++i) -+ if (!context->scan_descriptor_info) -+ return; -+ -+ for (i = 0; i < context->scan_descriptor_info->descriptor_count; ++i) - { -- if (context->uav_ranges[i].id == range_id) -- return &context->scan_descriptor_info->descriptors[context->uav_ranges[i].descriptor_idx]; -+ if (context->scan_descriptor_info->descriptors[i].register_id == range_id) -+ { -+ context->scan_descriptor_info->descriptors[i].flags |= flag; -+ break; -+ } - } -- -- return NULL; - } - - static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) -@@ -730,13 +726,7 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr - static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) - { -- struct vkd3d_shader_descriptor_info *d; -- -- if (!context->scan_descriptor_info) -- return; -- -- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); -- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; -+ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); - } - - static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) -@@ -749,13 +739,7 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in - static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) - { -- struct vkd3d_shader_descriptor_info *d; -- -- if (!context->scan_descriptor_info) -- return; -- -- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); -- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER; -+ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER); - } - - static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) -@@ -768,22 +752,16 @@ static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_ - static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) - { -- struct vkd3d_shader_descriptor_info *d; -- -- if (!context->scan_descriptor_info) -- return; -- -- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); -- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS; -+ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); - } - - static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, -- enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range, -- enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_resource_data_type resource_data_type, -- unsigned int flags) -+ enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, -+ const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, -+ enum vkd3d_shader_resource_data_type resource_data_type, unsigned int flags) - { -- struct vkd3d_shader_scan_descriptor_info *info = context->scan_descriptor_info; -- struct vkd3d_shader_descriptor_info *d; -+ struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; -+ struct vkd3d_shader_descriptor_info1 *d; - - if (!vkd3d_array_reserve((void **)&info->descriptors, &context->descriptors_size, - info->descriptor_count + 1, sizeof(*info->descriptors))) -@@ -794,6 +772,7 @@ static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *c - - d = &info->descriptors[info->descriptor_count]; - d->type = type; -+ d->register_id = reg->idx[0].offset; - d->register_space = range->space; - d->register_index = range->first; - d->resource_type = resource_type; -@@ -805,23 +784,6 @@ static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *c - return true; - } - --static bool vkd3d_shader_scan_add_uav_range(struct vkd3d_shader_scan_context *context, -- unsigned int id, unsigned int descriptor_idx) --{ -- if (!vkd3d_array_reserve((void **)&context->uav_ranges, &context->uav_ranges_size, -- context->uav_range_count + 1, sizeof(*context->uav_ranges))) -- { -- ERR("Failed to allocate UAV range.\n"); -- return false; -- } -- -- context->uav_ranges[context->uav_range_count].id = id; -- context->uav_ranges[context->uav_range_count].descriptor_idx = descriptor_idx; -- ++context->uav_range_count; -- -- return true; --} -- - static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_instruction *instruction) - { -@@ -830,7 +792,7 @@ static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_sc - if (!context->scan_descriptor_info) - return; - -- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->range, -+ vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->src.reg, &cb->range, - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); - } - -@@ -847,7 +809,7 @@ static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_conte - flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; - else - flags = 0; -- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->range, -+ vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->src.reg, &sampler->range, - VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); - } - -@@ -864,10 +826,8 @@ static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_cont - type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; - else - type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; -- vkd3d_shader_scan_add_descriptor(context, type, &resource->range, resource_type, resource_data_type, 0); -- if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -- vkd3d_shader_scan_add_uav_range(context, resource->reg.reg.idx[0].offset, -- context->scan_descriptor_info->descriptor_count - 1); -+ vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, &resource->range, -+ resource_type, resource_data_type, 0); - } - - static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, -@@ -1117,24 +1077,64 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - return VKD3D_OK; - } - -+static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descriptor_info *info, -+ const struct vkd3d_shader_scan_descriptor_info1 *info1) -+{ -+ unsigned int i; -+ -+ if (!(info->descriptors = vkd3d_calloc(info1->descriptor_count, sizeof(*info->descriptors)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ for (i = 0; i < info1->descriptor_count; ++i) -+ { -+ const struct vkd3d_shader_descriptor_info1 *src = &info1->descriptors[i]; -+ struct vkd3d_shader_descriptor_info *dst = &info->descriptors[i]; -+ -+ dst->type = src->type; -+ dst->register_space = src->register_space; -+ dst->register_index = src->register_index; -+ dst->resource_type = src->resource_type; -+ dst->resource_data_type = src->resource_data_type; -+ dst->flags = src->flags; -+ dst->count = src->count; -+ } -+ info->descriptor_count = info1->descriptor_count; -+ -+ return VKD3D_OK; -+} -+ -+static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) -+{ -+ TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); -+ -+ vkd3d_free(scan_descriptor_info->descriptors); -+} -+ - static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) -+ struct vkd3d_shader_message_context *message_context, -+ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) - { -- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; -+ struct vkd3d_shader_scan_descriptor_info *descriptor_info; - struct vkd3d_shader_scan_signature_info *signature_info; - struct vkd3d_shader_instruction *instruction; - struct vkd3d_shader_scan_context context; - int ret = VKD3D_OK; - unsigned int i; - -- if ((scan_descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO))) -+ descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); -+ if (descriptor_info1) -+ { -+ descriptor_info1->descriptors = NULL; -+ descriptor_info1->descriptor_count = 0; -+ } -+ else if (descriptor_info) - { -- scan_descriptor_info->descriptors = NULL; -- scan_descriptor_info->descriptor_count = 0; -+ descriptor_info1 = &local_descriptor_info1; - } - signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); - -- vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); -+ vkd3d_shader_scan_context_init(&context, compile_info, descriptor_info1, message_context); - - if (TRACE_ON()) - { -@@ -1145,19 +1145,16 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - { - instruction = &parser->instructions.elements[i]; - if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) -- { -- if (scan_descriptor_info) -- vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); - break; -- } - } - - for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) - { - struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; -+ struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; - - if (parser->shader_desc.flat_constant_count[i].external) -- vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, -+ vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, - &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); - } - -@@ -1169,13 +1166,26 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, - &parser->shader_desc.patch_constant_signature)) - { -- vkd3d_shader_free_scan_signature_info(signature_info); -- if (scan_descriptor_info) -- vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); - ret = VKD3D_ERROR_OUT_OF_MEMORY; - } - } - -+ if (!ret && descriptor_info) -+ ret = convert_descriptor_info(descriptor_info, descriptor_info1); -+ -+ if (ret < 0) -+ { -+ if (descriptor_info) -+ vkd3d_shader_free_scan_descriptor_info(descriptor_info); -+ if (descriptor_info1) -+ vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); -+ if (signature_info) -+ vkd3d_shader_free_scan_signature_info(signature_info); -+ } -+ else -+ { -+ vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); -+ } - vkd3d_shader_scan_context_cleanup(&context); - return ret; - } -@@ -1192,7 +1202,7 @@ static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - -- ret = scan_with_parser(compile_info, message_context, parser); -+ ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -@@ -1210,7 +1220,7 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - -- ret = scan_with_parser(compile_info, message_context, parser); -+ ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -@@ -1228,7 +1238,7 @@ static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - -- ret = scan_with_parser(compile_info, message_context, parser); -+ ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -@@ -1287,7 +1297,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { -- struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; -+ struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; - struct vkd3d_glsl_generator *glsl_generator; - struct vkd3d_shader_compile_info scan_info; - int ret; -@@ -1295,11 +1305,8 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); - - scan_info = *compile_info; -- scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; -- scan_descriptor_info.next = scan_info.next; -- scan_info.next = &scan_descriptor_info; - -- if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) -+ if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) - return ret; - - switch (compile_info->target_type) -@@ -1313,7 +1320,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - message_context, &parser->location))) - { - ERR("Failed to create GLSL generator.\n"); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -+ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - return VKD3D_ERROR; - } - -@@ -1331,7 +1338,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - assert(0); - } - -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -+ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - return ret; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index dc43175d4b5..c719085e11f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -133,10 +133,13 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, - VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, - VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, -+ VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, - VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE = 5302, -+ VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, -+ VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, - - VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, - -@@ -511,6 +514,8 @@ enum vkd3d_shader_register_type - VKD3DSPR_OUTSTENCILREF, - VKD3DSPR_UNDEF, - -+ VKD3DSPR_COUNT, -+ - VKD3DSPR_INVALID = ~0u, - }; - -@@ -1108,6 +1113,24 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse - parser->ops->parser_destroy(parser); - } - -+struct vkd3d_shader_descriptor_info1 -+{ -+ enum vkd3d_shader_descriptor_type type; -+ unsigned int register_space; -+ unsigned int register_index; -+ unsigned int register_id; -+ enum vkd3d_shader_resource_type resource_type; -+ enum vkd3d_shader_resource_data_type resource_data_type; -+ unsigned int flags; -+ unsigned int count; -+}; -+ -+struct vkd3d_shader_scan_descriptor_info1 -+{ -+ struct vkd3d_shader_descriptor_info1 *descriptors; -+ unsigned int descriptor_count; -+}; -+ - void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, - const struct vkd3d_shader_version *shader_version); - -@@ -1230,7 +1253,7 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); - #define SPIRV_MAX_SRC_COUNT 6 - - int spirv_compile(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - --- -2.40.1 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-a597dc8755af5d2ef4826f1b570927379af.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-a597dc8755af5d2ef4826f1b570927379af.patch new file mode 100644 index 00000000..0dcd01fc --- /dev/null +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-a597dc8755af5d2ef4826f1b570927379af.patch @@ -0,0 +1,1608 @@ +From 464fcf91e1a7e67b22c83e29d0ea3ec5dceecd35 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 31 Aug 2023 09:08:26 +1000 +Subject: [PATCH] Updated vkd3d to a597dc8755af5d2ef4826f1b570927379afc5824. + +--- + libs/vkd3d/include/vkd3d_shader.h | 2 + + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 11 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 620 +++++++++++++++++- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 75 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 13 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 316 ++++----- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 25 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 13 +- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 3 - + .../libs/vkd3d-shader/vkd3d_shader_private.h | 10 + + 11 files changed, 848 insertions(+), 242 deletions(-) + +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index cfe54dbff53..d329e205fd1 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -1463,6 +1463,8 @@ enum vkd3d_shader_sysval_semantic + VKD3D_SHADER_SV_TESS_FACTOR_TRIINT = 0x0e, + VKD3D_SHADER_SV_TESS_FACTOR_LINEDET = 0x0f, + VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN = 0x10, ++ /** Render target; SV_Target in Direct3D shader model 6 shaders. */ ++ VKD3D_SHADER_SV_TARGET = 0x40, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SYSVAL_SEMANTIC), + }; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 99a5bd7a438..2b02d51f59a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1638,17 +1638,12 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + + if (var->is_param && var->is_uniform) + { +- struct vkd3d_string_buffer *name; ++ char *new_name; + +- if (!(name = hlsl_get_string_buffer(ctx))) +- { +- buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; ++ if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) + return; +- } +- vkd3d_string_buffer_printf(name, "$%s", var->name); + vkd3d_free((char *)var->name); +- var->name = hlsl_strdup(ctx, name->buffer); +- hlsl_release_string_buffer(ctx, name); ++ var->name = new_name; + } + } + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index f9efe47f95d..666d8b08614 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -22,6 +22,7 @@ + #define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) + + #define BITCODE_MAGIC VKD3D_MAKE_TAG('B', 'C', 0xc0, 0xde) ++#define DXIL_OP_MAX_OPERANDS 17 + + enum bitcode_block_id + { +@@ -138,6 +139,11 @@ enum bitcode_value_symtab_code + VST_CODE_BBENTRY = 2, + }; + ++enum dx_intrinsic_opcode ++{ ++ DX_STORE_OUTPUT = 5, ++}; ++ + struct sm6_pointer_info + { + const struct sm6_type *type; +@@ -242,6 +248,8 @@ struct sm6_function + + struct sm6_block *blocks[1]; + size_t block_count; ++ ++ size_t value_count; + }; + + struct dxil_block +@@ -287,12 +295,15 @@ struct sm6_parser + struct sm6_symbol *global_symbols; + size_t global_symbol_count; + ++ struct vkd3d_shader_dst_param *output_params; ++ + struct sm6_function *functions; + size_t function_count; + + struct sm6_value *values; + size_t value_count; + size_t value_capacity; ++ size_t cur_max_value; + + struct vkd3d_shader_parser p; + }; +@@ -316,6 +327,8 @@ struct dxil_global_abbrev + struct dxil_abbrev abbrev; + }; + ++static const uint64_t CALL_CONV_FLAG_EXPLICIT_TYPE = 1ull << 15; ++ + static size_t size_add_with_overflow_check(size_t a, size_t b) + { + size_t i = a + b; +@@ -1261,6 +1274,16 @@ static inline bool sm6_type_is_integer(const struct sm6_type *type) + return type->class == TYPE_CLASS_INTEGER; + } + ++static inline bool sm6_type_is_i8(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_INTEGER && type->u.width == 8; ++} ++ ++static inline bool sm6_type_is_i32(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_INTEGER && type->u.width == 32; ++} ++ + static inline bool sm6_type_is_floating_point(const struct sm6_type *type) + { + return type->class == TYPE_CLASS_FLOAT; +@@ -1341,6 +1364,30 @@ static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type + return NULL; + } + ++/* Never returns null for elem_idx 0. */ ++static const struct sm6_type *sm6_type_get_scalar_type(const struct sm6_type *type, unsigned int elem_idx) ++{ ++ switch (type->class) ++ { ++ case TYPE_CLASS_ARRAY: ++ case TYPE_CLASS_VECTOR: ++ if (elem_idx >= type->u.array.count) ++ return NULL; ++ return sm6_type_get_scalar_type(type->u.array.elem_type, 0); ++ ++ case TYPE_CLASS_POINTER: ++ return sm6_type_get_scalar_type(type->u.pointer.type, 0); ++ ++ case TYPE_CLASS_STRUCT: ++ if (elem_idx >= type->u.struc->elem_count) ++ return NULL; ++ return sm6_type_get_scalar_type(type->u.struc->elem_types[elem_idx], 0); ++ ++ default: ++ return type; ++ } ++} ++ + static const struct sm6_type *sm6_parser_get_type(struct sm6_parser *sm6, uint64_t type_id) + { + if (type_id >= sm6->type_count) +@@ -1443,9 +1490,32 @@ static const char *sm6_parser_get_global_symbol_name(const struct sm6_parser *sm + return NULL; + } + ++static unsigned int register_get_uint_value(const struct vkd3d_shader_register *reg) ++{ ++ if (!register_is_constant(reg) || !data_type_is_integer(reg->data_type)) ++ return UINT_MAX; ++ ++ if (reg->immconst_type == VKD3D_IMMCONST_VEC4) ++ WARN("Returning vec4.x.\n"); ++ ++ if (reg->type == VKD3DSPR_IMMCONST64) ++ { ++ if (reg->u.immconst_uint64[0] > UINT_MAX) ++ FIXME("Truncating 64-bit value.\n"); ++ return reg->u.immconst_uint64[0]; ++ } ++ ++ return reg->u.immconst_uint[0]; ++} ++ ++static inline bool sm6_value_is_function_dcl(const struct sm6_value *value) ++{ ++ return value->value_type == VALUE_TYPE_FUNCTION; ++} ++ + static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) + { +- assert(fn->value_type == VALUE_TYPE_FUNCTION); ++ assert(sm6_value_is_function_dcl(fn)); + return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); + } + +@@ -1455,6 +1525,60 @@ static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_pa + return &sm6->values[sm6->value_count]; + } + ++static inline bool sm6_value_is_register(const struct sm6_value *value) ++{ ++ return value->value_type == VALUE_TYPE_REG; ++} ++ ++static inline bool sm6_value_is_constant(const struct sm6_value *value) ++{ ++ return sm6_value_is_register(value) && register_is_constant(&value->u.reg); ++} ++ ++static inline bool sm6_value_is_undef(const struct sm6_value *value) ++{ ++ return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; ++} ++ ++static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) ++{ ++ if (!sm6_value_is_constant(value)) ++ return UINT_MAX; ++ return register_get_uint_value(&value->u.reg); ++} ++ ++static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_shader_instruction *ins, ++ unsigned int count, struct sm6_parser *sm6) ++{ ++ struct vkd3d_shader_src_param *params = shader_parser_get_src_params(&sm6->p, count); ++ if (!params) ++ { ++ ERR("Failed to allocate src params.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating instruction src paramaters."); ++ return NULL; ++ } ++ ins->src = params; ++ ins->src_count = count; ++ return params; ++} ++ ++static struct vkd3d_shader_dst_param *instruction_dst_params_alloc(struct vkd3d_shader_instruction *ins, ++ unsigned int count, struct sm6_parser *sm6) ++{ ++ struct vkd3d_shader_dst_param *params = shader_parser_get_dst_params(&sm6->p, count); ++ if (!params) ++ { ++ ERR("Failed to allocate dst params.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating instruction dst paramaters."); ++ return NULL; ++ } ++ ins->dst = params; ++ ins->dst_count = count; ++ return params; ++} ++ + static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type *type) + { + if (type->class == TYPE_CLASS_INTEGER) +@@ -1488,6 +1612,47 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type + return VKD3D_DATA_UINT; + } + ++static inline void dst_param_init_scalar(struct vkd3d_shader_dst_param *param, unsigned int component_idx) ++{ ++ param->write_mask = 1u << component_idx; ++ param->modifiers = 0; ++ param->shift = 0; ++} ++ ++static inline void src_param_init(struct vkd3d_shader_src_param *param) ++{ ++ param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ param->modifiers = VKD3DSPSM_NONE; ++} ++ ++static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) ++{ ++ src_param_init(param); ++ param->reg = src->u.reg; ++} ++ ++static void register_address_init(struct vkd3d_shader_register *reg, const struct sm6_value *address, ++ unsigned int idx, struct sm6_parser *sm6) ++{ ++ assert(idx < ARRAY_SIZE(reg->idx)); ++ if (sm6_value_is_constant(address)) ++ { ++ reg->idx[idx].offset = sm6_value_get_constant_uint(address); ++ } ++ else if (sm6_value_is_undef(address)) ++ { ++ reg->idx[idx].offset = 0; ++ } ++ else ++ { ++ struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&sm6->p, 1); ++ if (rel_addr) ++ src_param_init_from_value(rel_addr, address); ++ reg->idx[idx].offset = 0; ++ reg->idx[idx].rel_addr = rel_addr; ++ } ++} ++ + /* Recurse through the block tree while maintaining a current value count. The current + * count is the sum of the global count plus all declarations within the current function. + * Store into value_capacity the highest count seen. */ +@@ -1513,6 +1678,7 @@ static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, + * overestimate the value count somewhat, but this should be no problem. */ + value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); + sm6->value_capacity = max(sm6->value_capacity, value_count); ++ sm6->functions[sm6->function_count].value_count = value_count; + /* The value count returns to its previous value after handling a function. */ + if (value_count < SIZE_MAX) + value_count = old_value_count; +@@ -1524,6 +1690,77 @@ static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, + return value_count; + } + ++static size_t sm6_parser_get_value_index(struct sm6_parser *sm6, uint64_t idx) ++{ ++ size_t i; ++ ++ /* The value relative index is 32 bits. */ ++ if (idx > UINT32_MAX) ++ WARN("Ignoring upper 32 bits of relative index.\n"); ++ i = (uint32_t)sm6->value_count - (uint32_t)idx; ++ ++ /* This may underflow to produce a forward reference, but it must not exceeed the final value count. */ ++ if (i >= sm6->cur_max_value) ++ { ++ WARN("Invalid value index %"PRIx64" at %zu.\n", idx, sm6->value_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value relative index %u.", (unsigned int)idx); ++ return SIZE_MAX; ++ } ++ if (i == sm6->value_count) ++ { ++ WARN("Invalid value self-reference at %zu.\n", sm6->value_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid value self-reference."); ++ return SIZE_MAX; ++ } ++ ++ return i; ++} ++ ++static size_t sm6_parser_get_value_idx_by_ref(struct sm6_parser *sm6, const struct dxil_record *record, ++ const struct sm6_type *fwd_type, unsigned int *rec_idx) ++{ ++ unsigned int idx; ++ uint64_t val_ref; ++ size_t operand; ++ ++ idx = *rec_idx; ++ if (!dxil_record_validate_operand_min_count(record, idx + 1, sm6)) ++ return SIZE_MAX; ++ val_ref = record->operands[idx++]; ++ ++ operand = sm6_parser_get_value_index(sm6, val_ref); ++ if (operand == SIZE_MAX) ++ return SIZE_MAX; ++ ++ if (operand >= sm6->value_count) ++ { ++ if (!fwd_type) ++ { ++ /* Forward references are followed by a type id unless an earlier operand set the type, ++ * or it is contained in a function declaration. */ ++ if (!dxil_record_validate_operand_min_count(record, idx + 1, sm6)) ++ return SIZE_MAX; ++ if (!(fwd_type = sm6_parser_get_type(sm6, record->operands[idx++]))) ++ return SIZE_MAX; ++ } ++ FIXME("Forward value references are not supported yet.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Unsupported value forward reference."); ++ return SIZE_MAX; ++ } ++ *rec_idx = idx; ++ ++ return operand; ++} ++ ++static const struct sm6_value *sm6_parser_get_value_by_ref(struct sm6_parser *sm6, ++ const struct dxil_record *record, const struct sm6_type *type, unsigned int *rec_idx) ++{ ++ size_t operand = sm6_parser_get_value_idx_by_ref(sm6, record, type, rec_idx); ++ return operand == SIZE_MAX ? NULL : &sm6->values[operand]; ++} ++ + static bool sm6_parser_declare_function(struct sm6_parser *sm6, const struct dxil_record *record) + { + const unsigned int max_count = 15; +@@ -1816,6 +2053,81 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) + return VKD3D_OK; + } + ++static void dst_param_io_init(struct vkd3d_shader_dst_param *param, ++ const struct signature_element *e, enum vkd3d_shader_register_type reg_type) ++{ ++ enum vkd3d_shader_component_type component_type; ++ ++ param->write_mask = e->mask; ++ param->modifiers = 0; ++ param->shift = 0; ++ /* DXIL types do not have signedness. Load signed elements as unsigned. */ ++ component_type = e->component_type == VKD3D_SHADER_COMPONENT_INT ? VKD3D_SHADER_COMPONENT_UINT : e->component_type; ++ shader_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(component_type), 0); ++} ++ ++static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, ++ enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) ++{ ++ struct vkd3d_shader_dst_param *param; ++ const struct signature_element *e; ++ unsigned int i; ++ ++ for (i = 0; i < s->element_count; ++i) ++ { ++ e = &s->elements[i]; ++ ++ param = ¶ms[i]; ++ dst_param_io_init(param, e, reg_type); ++ param->reg.idx[0].offset = i; ++ param->reg.idx_count = 1; ++ } ++} ++ ++static void sm6_parser_emit_signature(struct sm6_parser *sm6, const struct shader_signature *s, ++ enum vkd3d_shader_opcode handler_idx, enum vkd3d_shader_opcode siv_handler_idx, ++ struct vkd3d_shader_dst_param *params) ++{ ++ struct vkd3d_shader_instruction *ins; ++ struct vkd3d_shader_dst_param *param; ++ const struct signature_element *e; ++ unsigned int i; ++ ++ for (i = 0; i < s->element_count; ++i) ++ { ++ e = &s->elements[i]; ++ ++ /* Do not check e->used_mask because in some cases it is zero for used elements. ++ * TODO: scan ahead for used I/O elements. */ ++ ++ if (e->sysval_semantic != VKD3D_SHADER_SV_NONE && e->sysval_semantic != VKD3D_SHADER_SV_TARGET) ++ { ++ ins = sm6_parser_add_instruction(sm6, siv_handler_idx); ++ param = &ins->declaration.register_semantic.reg; ++ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); ++ } ++ else ++ { ++ ins = sm6_parser_add_instruction(sm6, handler_idx); ++ param = &ins->declaration.dst; ++ } ++ ++ *param = params[i]; ++ } ++} ++ ++static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) ++{ ++ sm6_parser_init_signature(sm6, output_signature, ++ (sm6->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3DSPR_COLOROUT : VKD3DSPR_OUTPUT, ++ sm6->output_params); ++} ++ ++static void sm6_parser_emit_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) ++{ ++ sm6_parser_emit_signature(sm6, output_signature, VKD3DSIH_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT_SIV, sm6->output_params); ++} ++ + static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) + { + size_t i, count = sm6->function_count; +@@ -1838,6 +2150,258 @@ static struct sm6_block *sm6_block_create() + return block; + } + ++static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, struct sm6_block *code_block, ++ enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct vkd3d_shader_instruction *ins) ++{ ++ struct vkd3d_shader_src_param *src_param; ++ struct vkd3d_shader_dst_param *dst_param; ++ const struct shader_signature *signature; ++ unsigned int row_index, column_index; ++ const struct signature_element *e; ++ const struct sm6_value *value; ++ ++ row_index = sm6_value_get_constant_uint(operands[0]); ++ column_index = sm6_value_get_constant_uint(operands[2]); ++ ++ signature = &sm6->p.shader_desc.output_signature; ++ if (row_index >= signature->element_count) ++ { ++ WARN("Invalid row index %u.\n", row_index); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid output row index %u.", row_index); ++ return; ++ } ++ e = &signature->elements[row_index]; ++ ++ if (column_index >= VKD3D_VEC4_SIZE) ++ { ++ WARN("Invalid column index %u.\n", column_index); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid output column index %u.", column_index); ++ return; ++ } ++ ++ value = operands[3]; ++ if (!sm6_value_is_register(value)) ++ { ++ WARN("Source value is not a register.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Expected store operation source to be a register."); ++ return; ++ } ++ ++ shader_instruction_init(ins, VKD3DSIH_MOV); ++ ++ if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) ++ return; ++ dst_param_init_scalar(dst_param, column_index); ++ dst_param->reg = sm6->output_params[row_index].reg; ++ if (e->register_count > 1) ++ register_address_init(&dst_param->reg, operands[1], 0, sm6); ++ ++ if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ src_param_init_from_value(src_param, value); ++} ++ ++struct sm6_dx_opcode_info ++{ ++ const char ret_type; ++ const char *operand_info; ++ void (*handler)(struct sm6_parser *, struct sm6_block *, enum dx_intrinsic_opcode, ++ const struct sm6_value **, struct vkd3d_shader_instruction *); ++}; ++ ++/* ++ 8 -> int8 ++ i -> int32 ++ v -> void ++ o -> overloaded ++ */ ++static const struct sm6_dx_opcode_info sm6_dx_op_table[] = ++{ ++ [DX_STORE_OUTPUT ] = {'v', "ii8o", sm6_parser_emit_dx_store_output}, ++}; ++ ++static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_type *type, char info_type) ++{ ++ switch (info_type) ++ { ++ case 0: ++ FIXME("Invalid operand count.\n"); ++ return false; ++ case '8': ++ return sm6_type_is_i8(type); ++ case 'i': ++ return sm6_type_is_i32(type); ++ case 'v': ++ return !type; ++ case 'o': ++ /* TODO: some type checking may be possible */ ++ return true; ++ default: ++ FIXME("Unhandled operand code '%c'.\n", info_type); ++ return false; ++ } ++} ++ ++static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const char *name, ++ const struct sm6_value **operands, unsigned int operand_count, struct sm6_value *dst) ++{ ++ const struct sm6_dx_opcode_info *info; ++ unsigned int i; ++ ++ info = &sm6_dx_op_table[op]; ++ ++ if (!sm6_parser_validate_operand_type(sm6, dst->type, info->ret_type)) ++ { ++ WARN("Failed to validate return type for dx intrinsic id %u, '%s'.\n", op, name); ++ /* Return type validation failure is not so critical. We only need to set ++ * a data type for the SSA result. */ ++ } ++ ++ for (i = 0; i < operand_count; ++i) ++ { ++ const struct sm6_value *value = operands[i]; ++ if (!sm6_value_is_register(value) || !sm6_parser_validate_operand_type(sm6, value->type, info->operand_info[i])) ++ { ++ WARN("Failed to validate operand %u for dx intrinsic id %u, '%s'.\n", i + 1, op, name); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Operand %u for call to dx intrinsic function '%s' is invalid.", i + 1, name); ++ return false; ++ } ++ } ++ if (info->operand_info[operand_count]) ++ { ++ WARN("Missing operands for dx intrinsic id %u, '%s'.\n", op, name); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "Call to dx intrinsic function '%s' has missing operands.", name); ++ return false; ++ } ++ ++ return true; ++} ++ ++static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shader_instruction *ins, ++ struct sm6_value *dst) ++{ ++ const struct sm6_type *type; ++ ++ ins->handler_idx = VKD3DSIH_NOP; ++ ++ if (!dst->type) ++ return; ++ ++ type = sm6_type_get_scalar_type(dst->type, 0); ++ shader_register_init(&dst->u.reg, VKD3DSPR_UNDEF, vkd3d_data_type_from_sm6_type(type), 0); ++ /* dst->is_undefined is not set here because it flags only explicitly undefined values. */ ++} ++ ++static void sm6_parser_decode_dx_op(struct sm6_parser *sm6, struct sm6_block *code_block, enum dx_intrinsic_opcode op, ++ const char *name, const struct sm6_value **operands, unsigned int operand_count, ++ struct vkd3d_shader_instruction *ins, struct sm6_value *dst) ++{ ++ if (op >= ARRAY_SIZE(sm6_dx_op_table) || !sm6_dx_op_table[op].operand_info) ++ { ++ FIXME("Unhandled dx intrinsic function id %u, '%s'.\n", op, name); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNHANDLED_INTRINSIC, ++ "Call to intrinsic function %s is unhandled.", name); ++ sm6_parser_emit_unhandled(sm6, ins, dst); ++ return; ++ } ++ ++ if (sm6_parser_validate_dx_op(sm6, op, name, operands, operand_count, dst)) ++ sm6_dx_op_table[op].handler(sm6, code_block, op, operands, ins); ++ else ++ sm6_parser_emit_unhandled(sm6, ins, dst); ++} ++ ++static void sm6_parser_emit_call(struct sm6_parser *sm6, const struct dxil_record *record, ++ struct sm6_block *code_block, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) ++{ ++ const struct sm6_value *operands[DXIL_OP_MAX_OPERANDS]; ++ const struct sm6_value *fn_value, *op_value; ++ unsigned int i = 1, j, operand_count; ++ const struct sm6_type *type = NULL; ++ uint64_t call_conv; ++ ++ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) ++ return; ++ ++ /* TODO: load the 1-based attributes index from record->operands[0] and validate against attribute count. */ ++ ++ if ((call_conv = record->operands[i++]) & CALL_CONV_FLAG_EXPLICIT_TYPE) ++ type = sm6_parser_get_type(sm6, record->operands[i++]); ++ if (call_conv &= ~CALL_CONV_FLAG_EXPLICIT_TYPE) ++ WARN("Ignoring calling convention %#"PRIx64".\n", call_conv); ++ ++ if (!(fn_value = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))) ++ return; ++ if (!sm6_value_is_function_dcl(fn_value)) ++ { ++ WARN("Function target value is not a function declaration.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Function call target value is not a function declaration."); ++ return; ++ } ++ ++ if (type && type != fn_value->type->u.pointer.type) ++ WARN("Explicit call type does not match function type.\n"); ++ type = fn_value->type->u.pointer.type; ++ ++ if (!sm6_type_is_void(type->u.function->ret_type)) ++ dst->type = type->u.function->ret_type; ++ ++ operand_count = type->u.function->param_count; ++ if (operand_count > ARRAY_SIZE(operands)) ++ { ++ WARN("Ignoring %zu operands.\n", operand_count - ARRAY_SIZE(operands)); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring %zu operands for function call.", operand_count - ARRAY_SIZE(operands)); ++ operand_count = ARRAY_SIZE(operands); ++ } ++ ++ for (j = 0; j < operand_count; ++j) ++ { ++ if (!(operands[j] = sm6_parser_get_value_by_ref(sm6, record, type->u.function->param_types[j], &i))) ++ return; ++ } ++ if ((j = record->operand_count - i)) ++ { ++ WARN("Ignoring %u operands beyond the function parameter list.\n", j); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring %u function call operands beyond the parameter list.", j); ++ } ++ ++ if (!fn_value->u.function.is_prototype) ++ { ++ FIXME("Unhandled call to local function.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Call to a local function is unsupported."); ++ return; ++ } ++ if (!sm6_value_is_dx_intrinsic_dcl(fn_value)) ++ WARN("External function is not a dx intrinsic.\n"); ++ ++ if (!operand_count) ++ { ++ WARN("Missing dx intrinsic function id.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "The id for a dx intrinsic function is missing."); ++ return; ++ } ++ ++ op_value = operands[0]; ++ if (!sm6_value_is_constant(op_value) || !sm6_type_is_integer(op_value->type)) ++ { ++ WARN("dx intrinsic function id is not a constant int.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Expected a constant integer dx intrinsic function id."); ++ return; ++ } ++ sm6_parser_decode_dx_op(sm6, code_block, register_get_uint_value(&op_value->u.reg), ++ fn_value->u.function.name, &operands[1], operand_count - 1, ins, dst); ++} ++ + static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record *record, + struct sm6_block *code_block, struct vkd3d_shader_instruction *ins) + { +@@ -1855,15 +2419,10 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + { + struct vkd3d_shader_instruction *ins; + const struct dxil_record *record; ++ bool ret_found, is_terminator; + struct sm6_block *code_block; + struct sm6_value *dst; + size_t i, block_idx; +- bool ret_found; +- enum +- { +- RESULT_VALUE, +- RESULT_TERMINATE, +- } result_type; + + if (sm6->function_count) + { +@@ -1907,10 +2466,20 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + } + code_block = function->blocks[0]; + ++ sm6->cur_max_value = function->value_count; ++ + for (i = 1, block_idx = 0, ret_found = false; i < block->record_count; ++i) + { + sm6->p.location.column = i; + ++ if (!code_block) ++ { ++ WARN("Invalid block count %zu.\n", function->block_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid block count %zu.", function->block_count); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ + /* block->record_count - 1 is the instruction count, but some instructions + * can emit >1 IR instruction, so extra may be used. */ + if (!vkd3d_array_reserve((void **)&code_block->instructions, &code_block->instruction_capacity, +@@ -1926,14 +2495,17 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + dst = sm6_parser_get_current_value(sm6); + dst->type = NULL; + dst->value_type = VALUE_TYPE_REG; +- result_type = RESULT_VALUE; ++ is_terminator = false; + + record = block->records[i]; + switch (record->code) + { ++ case FUNC_CODE_INST_CALL: ++ sm6_parser_emit_call(sm6, record, code_block, ins, dst); ++ break; + case FUNC_CODE_INST_RET: + sm6_parser_emit_ret(sm6, record, code_block, ins); +- result_type = RESULT_TERMINATE; ++ is_terminator = true; + ret_found = true; + break; + default: +@@ -1941,7 +2513,11 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + return VKD3D_ERROR_INVALID_SHADER; + } + +- if (result_type == RESULT_TERMINATE) ++ if (sm6->p.failed) ++ return VKD3D_ERROR; ++ assert(ins->handler_idx != VKD3DSIH_INVALID); ++ ++ if (is_terminator) + { + ++block_idx; + code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; +@@ -1950,6 +2526,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; + else + assert(ins->handler_idx == VKD3DSIH_NOP); ++ + sm6->value_count += !!dst->type; + } + +@@ -1996,6 +2573,8 @@ static enum vkd3d_result sm6_parser_module_init(struct sm6_parser *sm6, const st + switch (block->id) + { + case CONSTANTS_BLOCK: ++ function = &sm6->functions[sm6->function_count]; ++ sm6->cur_max_value = function->value_count; + return sm6_parser_constants_init(sm6, block); + + case FUNCTION_BLOCK: +@@ -2103,6 +2682,7 @@ static const struct vkd3d_shader_parser_ops sm6_parser_ops = + static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, + const char *source_name, struct vkd3d_shader_message_context *message_context) + { ++ const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; + const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t version_token, dxil_version, token_count, magic; + unsigned int chunk_offset, chunk_size; +@@ -2258,6 +2838,14 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + return ret; + } + ++ if (!(sm6->output_params = shader_parser_get_dst_params(&sm6->p, output_signature->element_count))) ++ { ++ ERR("Failed to allocate output parameters.\n"); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating output parameters."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ + function_count = dxil_block_compute_function_count(&sm6->root_block); + if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) + { +@@ -2288,6 +2876,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + return ret; + } + ++ sm6_parser_init_output_signature(sm6, output_signature); ++ + if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) +@@ -2296,11 +2886,17 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, + "DXIL module is invalid."); +- else +- vkd3d_unreachable(); + return ret; + } + ++ if (!sm6_parser_require_space(sm6, output_signature->element_count)) ++ { ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory emitting shader signature declarations."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ sm6_parser_emit_output_signature(sm6, output_signature); ++ + for (i = 0; i < sm6->function_count; ++i) + { + if (!sm6_block_emit_instructions(sm6->functions[i].blocks[0], sm6)) +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 8b706e1e667..b8cf6813f67 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -72,6 +72,27 @@ void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, c + ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED; + } + ++char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) ++{ ++ struct vkd3d_string_buffer *string; ++ va_list args; ++ char *ret; ++ ++ if (!(string = hlsl_get_string_buffer(ctx))) ++ return NULL; ++ va_start(args, fmt); ++ if (vkd3d_string_buffer_vprintf(string, fmt, args) < 0) ++ { ++ va_end(args); ++ hlsl_release_string_buffer(ctx, string); ++ return NULL; ++ } ++ va_end(args); ++ ret = hlsl_strdup(ctx, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return ret; ++} ++ + bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var) + { + struct hlsl_scope *scope = ctx->cur_scope; +@@ -1039,11 +1060,10 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem + { + struct vkd3d_string_buffer *string; + struct hlsl_ir_var *var; +- static LONG counter; + + if (!(string = hlsl_get_string_buffer(ctx))) + return NULL; +- vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); ++ vkd3d_string_buffer_printf(string, "<%s-%u>", template, ctx->internal_name_counter++); + var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc, true); + hlsl_release_string_buffer(ctx, string); + return var; +@@ -2968,6 +2988,16 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function + struct hlsl_ir_function *func; + struct rb_entry *func_entry; + ++ if (ctx->internal_func_name) ++ { ++ char *internal_name; ++ ++ if (!(internal_name = hlsl_strdup(ctx, ctx->internal_func_name))) ++ return; ++ vkd3d_free(name); ++ name = internal_name; ++ } ++ + func_entry = rb_get(&ctx->functions, name); + if (func_entry) + { +@@ -3499,3 +3529,44 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d + hlsl_ctx_cleanup(&ctx); + return ret; + } ++ ++struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl) ++{ ++ const struct hlsl_ir_function_decl *saved_cur_function = ctx->cur_function; ++ struct vkd3d_shader_code code = {.code = hlsl, .size = strlen(hlsl)}; ++ const char *saved_internal_func_name = ctx->internal_func_name; ++ struct vkd3d_string_buffer *internal_name; ++ struct hlsl_ir_function_decl *func; ++ void *saved_scanner = ctx->scanner; ++ int ret; ++ ++ TRACE("name %s, hlsl %s.\n", debugstr_a(name), debugstr_a(hlsl)); ++ ++ /* The actual name of the function is mangled with a unique prefix, both to ++ * allow defining multiple variants of a function with the same name, and to ++ * avoid polluting the user name space. */ ++ ++ if (!(internal_name = hlsl_get_string_buffer(ctx))) ++ return NULL; ++ vkd3d_string_buffer_printf(internal_name, "<%s-%u>", name, ctx->internal_name_counter++); ++ ++ /* Save and restore everything that matters. ++ * Note that saving the scope stack is hard, and shouldn't be necessary. */ ++ ++ ctx->scanner = NULL; ++ ctx->internal_func_name = internal_name->buffer; ++ ctx->cur_function = NULL; ++ ret = hlsl_lexer_compile(ctx, &code); ++ ctx->scanner = saved_scanner; ++ ctx->internal_func_name = saved_internal_func_name; ++ ctx->cur_function = saved_cur_function; ++ if (ret) ++ { ++ ERR("Failed to compile intrinsic, error %u.\n", ret); ++ hlsl_release_string_buffer(ctx, internal_name); ++ return NULL; ++ } ++ func = hlsl_get_func_decl(ctx, internal_name->buffer); ++ hlsl_release_string_buffer(ctx, internal_name); ++ return func; ++} +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 070fec74326..73b08ee3ea0 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -798,6 +798,9 @@ struct hlsl_ctx + /* Pointer to the current function; changes as the parser reads the code. */ + const struct hlsl_ir_function_decl *cur_function; + ++ /* Counter for generating unique internal variable names. */ ++ unsigned int internal_name_counter; ++ + /* Default matrix majority for matrix types. Can be set by a pragma within the HLSL source. */ + unsigned int matrix_majority; + +@@ -834,6 +837,12 @@ struct hlsl_ctx + * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ + uint32_t thread_count[3]; + ++ /* In some cases we generate opcodes by parsing an HLSL function and then ++ * invoking it. If not NULL, this field is the name of the function that we ++ * are currently parsing, "mangled" with an internal prefix to avoid ++ * polluting the user namespace. */ ++ const char *internal_func_name; ++ + /* Whether the parser is inside a state block (effects' metadata) inside a variable declaration. */ + uint32_t in_state_block : 1; + /* Whether the numthreads() attribute has been provided in the entry-point function. */ +@@ -1069,6 +1078,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) + } + } + ++char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) VKD3D_PRINTF_FUNC(2, 3); ++ + const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op); + const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type); + const char *debug_hlsl_writemask(unsigned int writemask); +@@ -1258,6 +1269,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); + int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); + ++struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); ++ + int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); + + #endif +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 43ea4b4d038..161d1ab42c3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -2330,6 +2330,92 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, + return args.decl; + } + ++static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ ++ return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); ++} ++ ++static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ const struct parse_initializer *args, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *call; ++ unsigned int i; ++ ++ assert(args->args_count == func->parameters.count); ++ ++ for (i = 0; i < func->parameters.count; ++i) ++ { ++ struct hlsl_ir_var *param = func->parameters.vars[i]; ++ struct hlsl_ir_node *arg = args->args[i]; ++ ++ if (!hlsl_types_are_equal(arg->data_type, param->data_type)) ++ { ++ struct hlsl_ir_node *cast; ++ ++ if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) ++ return false; ++ args->args[i] = cast; ++ arg = cast; ++ } ++ ++ if (param->storage_modifiers & HLSL_STORAGE_IN) ++ { ++ struct hlsl_ir_node *store; ++ ++ if (!(store = hlsl_new_simple_store(ctx, param, arg))) ++ return false; ++ hlsl_block_add_instr(args->instrs, store); ++ } ++ } ++ ++ if (!(call = hlsl_new_call(ctx, func, loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, call); ++ ++ for (i = 0; i < func->parameters.count; ++i) ++ { ++ struct hlsl_ir_var *param = func->parameters.vars[i]; ++ struct hlsl_ir_node *arg = args->args[i]; ++ ++ if (param->storage_modifiers & HLSL_STORAGE_OUT) ++ { ++ struct hlsl_ir_load *load; ++ ++ if (arg->data_type->modifiers & HLSL_MODIFIER_CONST) ++ hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, ++ "Output argument to \"%s\" is const.", func->func->name); ++ ++ if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, &load->node); ++ ++ if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) ++ return false; ++ } ++ } ++ ++ if (func->return_var) ++ { ++ struct hlsl_ir_load *load; ++ ++ if (!(load = hlsl_new_var_load(ctx, func->return_var, loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, &load->node); ++ } ++ else ++ { ++ struct hlsl_ir_node *expr; ++ ++ if (!(expr = hlsl_new_void_expr(ctx, loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, expr); ++ } ++ ++ return true; ++} ++ + static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, + const struct parse_initializer *params, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) + { +@@ -2948,14 +3034,17 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, + static bool intrinsic_lit(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow, *load; +- struct hlsl_ir_node *n_l, *n_h, *m, *diffuse, *zero, *store, *init; +- struct hlsl_constant_value init_value; +- struct hlsl_ir_load *var_load; +- struct hlsl_deref var_deref; +- struct hlsl_type *ret_type; +- struct hlsl_ir_var *var; +- struct hlsl_block block; ++ struct hlsl_ir_function_decl *func; ++ ++ static const char body[] = ++ "float4 lit(float n_l, float n_h, float m)\n" ++ "{\n" ++ " float4 ret;\n" ++ " ret.xw = 1.0;\n" ++ " ret.y = max(n_l, 0);\n" ++ " ret.z = (n_l < 0 || n_h < 0) ? 0 : pow(n_h, m);\n" ++ " return ret;\n" ++ "}"; + + if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR + || params->args[1]->data_type->class != HLSL_CLASS_SCALAR +@@ -2965,70 +3054,10 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, + return false; + } + +- if (!(n_l = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; +- +- if (!(n_h = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) +- return false; +- +- if (!(m = intrinsic_float_convert_arg(ctx, params, params->args[2], loc))) +- return false; +- +- ret_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); +- +- if (!(var = hlsl_new_synthetic_var(ctx, "lit", ret_type, loc))) +- return false; +- hlsl_init_simple_deref_from_var(&var_deref, var); +- +- init_value.u[0].f = 1.0f; +- init_value.u[1].f = 0.0f; +- init_value.u[2].f = 0.0f; +- init_value.u[3].f = 1.0f; +- if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, init); +- +- if (!(store = hlsl_new_simple_store(ctx, var, init))) +- return false; +- hlsl_block_add_instr(params->instrs, store); +- +- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, zero); +- +- /* Diffuse component. */ +- if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) ++ if (!(func = hlsl_compile_internal_function(ctx, "lit", body))) + return false; + +- if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) +- return false; +- hlsl_block_add_block(params->instrs, &block); +- +- /* Specular component. */ +- if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) +- return false; +- +- if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_l, zero, loc))) +- return false; +- +- if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) +- return false; +- +- if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) +- return false; +- +- if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) +- return false; +- +- if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) +- return false; +- hlsl_block_add_block(params->instrs, &block); +- +- if (!(var_load = hlsl_new_var_load(ctx, var, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, &var_load->node); +- +- return true; ++ return add_user_call(ctx, func, params, loc); + } + + static bool intrinsic_log(struct hlsl_ctx *ctx, +@@ -3336,58 +3365,29 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, + static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res, *one, *minus_two, *three; +- +- if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) +- return false; +- +- min_arg = params->args[0]; +- max_arg = params->args[1]; +- x_arg = params->args[2]; +- +- if (!(min_arg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, min_arg, loc))) +- return false; +- +- if (!(p_num = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, x_arg, min_arg, loc))) +- return false; +- +- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, max_arg, min_arg, loc))) +- return false; +- +- if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, one); +- +- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) +- return false; +- +- if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) +- return false; +- +- if (!(p = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, p, loc))) +- return false; +- +- if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, minus_two); +- +- if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) +- return false; +- hlsl_block_add_instr(params->instrs, three); ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_type *type; ++ char *body; + +- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) +- return false; ++ static const char template[] = ++ "%s smoothstep(%s low, %s high, %s x)\n" ++ "{\n" ++ " %s p = saturate((x - low) / (high - low));\n" ++ " return (p * p) * (3 - 2 * p);\n" ++ "}"; + +- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, three, res, loc))) ++ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + +- if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) ++ if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name))) + return false; +- +- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, res, loc))) ++ func = hlsl_compile_internal_function(ctx, "smoothstep", body); ++ vkd3d_free(body); ++ if (!func) + return false; + +- return true; ++ return add_user_call(ctx, func, params, loc); + } + + static bool intrinsic_sqrt(struct hlsl_ctx *ctx, +@@ -3478,6 +3478,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, + return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); + } + ++static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); ++} ++ + static bool intrinsic_transpose(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3648,6 +3654,7 @@ intrinsic_functions[] = + {"step", 2, true, intrinsic_step}, + {"tex2D", -1, false, intrinsic_tex2D}, + {"tex3D", -1, false, intrinsic_tex3D}, ++ {"texCUBE", -1, false, intrinsic_texCUBE}, + {"transpose", 1, true, intrinsic_transpose}, + {"trunc", 1, true, intrinsic_trunc}, + }; +@@ -3659,13 +3666,6 @@ static int intrinsic_function_name_compare(const void *a, const void *b) + return strcmp(a, func->name); + } + +-static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; +- +- return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); +-} +- + static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, + struct parse_initializer *args, const struct vkd3d_shader_location *loc) + { +@@ -3674,78 +3674,8 @@ static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, + + if ((decl = find_function_call(ctx, name, args, loc))) + { +- struct hlsl_ir_node *call; +- unsigned int i; +- +- assert(args->args_count == decl->parameters.count); +- +- for (i = 0; i < decl->parameters.count; ++i) +- { +- struct hlsl_ir_var *param = decl->parameters.vars[i]; +- struct hlsl_ir_node *arg = args->args[i]; +- +- if (!hlsl_types_are_equal(arg->data_type, param->data_type)) +- { +- struct hlsl_ir_node *cast; +- +- if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) +- goto fail; +- args->args[i] = cast; +- arg = cast; +- } +- +- if (param->storage_modifiers & HLSL_STORAGE_IN) +- { +- struct hlsl_ir_node *store; +- +- if (!(store = hlsl_new_simple_store(ctx, param, arg))) +- goto fail; +- hlsl_block_add_instr(args->instrs, store); +- } +- } +- +- if (!(call = hlsl_new_call(ctx, decl, loc))) ++ if (!add_user_call(ctx, decl, args, loc)) + goto fail; +- hlsl_block_add_instr(args->instrs, call); +- +- for (i = 0; i < decl->parameters.count; ++i) +- { +- struct hlsl_ir_var *param = decl->parameters.vars[i]; +- struct hlsl_ir_node *arg = args->args[i]; +- +- if (param->storage_modifiers & HLSL_STORAGE_OUT) +- { +- struct hlsl_ir_load *load; +- +- if (arg->data_type->modifiers & HLSL_MODIFIER_CONST) +- hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, +- "Output argument to \"%s\" is const.", decl->func->name); +- +- if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) +- goto fail; +- hlsl_block_add_instr(args->instrs, &load->node); +- +- if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) +- goto fail; +- } +- } +- +- if (decl->return_var) +- { +- struct hlsl_ir_load *load; +- +- if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) +- goto fail; +- hlsl_block_add_instr(args->instrs, &load->node); +- } +- else +- { +- struct hlsl_ir_node *expr; +- +- if (!(expr = hlsl_new_void_expr(ctx, loc))) +- goto fail; +- hlsl_block_add_instr(args->instrs, expr); +- } + } + else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), + sizeof(*intrinsic_functions), intrinsic_function_name_compare))) +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index bae8e5f9a5f..710d2908166 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -163,10 +163,10 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der + * work. */ + static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) + { +- struct vkd3d_string_buffer *name; + struct hlsl_ir_var *uniform; + struct hlsl_ir_node *store; + struct hlsl_ir_load *load; ++ char *new_name; + + /* Use the synthetic name for the temp, rather than the uniform, so that we + * can write the uniform name into the shader reflection data. */ +@@ -180,11 +180,9 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, + uniform->is_param = temp->is_param; + uniform->buffer = temp->buffer; + +- if (!(name = hlsl_get_string_buffer(ctx))) ++ if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) + return; +- vkd3d_string_buffer_printf(name, "", temp->name); +- temp->name = hlsl_strdup(ctx, name->buffer); +- hlsl_release_string_buffer(ctx, name); ++ temp->name = new_name; + + if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) + return; +@@ -235,16 +233,15 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + uint32_t index, bool output, const struct vkd3d_shader_location *loc) + { + struct hlsl_semantic new_semantic; +- struct vkd3d_string_buffer *name; + struct hlsl_ir_var *ext_var; ++ char *new_name; + +- if (!(name = hlsl_get_string_buffer(ctx))) ++ if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) + return NULL; +- vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, index); + + LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (!ascii_strcasecmp(ext_var->name, name->buffer)) ++ if (!ascii_strcasecmp(ext_var->name, new_name)) + { + if (output) + { +@@ -271,25 +268,23 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + } + } + +- hlsl_release_string_buffer(ctx, name); ++ vkd3d_free(new_name); + return ext_var; + } + } + + if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) + { +- hlsl_release_string_buffer(ctx, name); ++ vkd3d_free(new_name); + return NULL; + } + new_semantic.index = index; +- if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, loc, &new_semantic, +- modifiers, NULL))) ++ if (!(ext_var = hlsl_new_var(ctx, new_name, type, loc, &new_semantic, modifiers, NULL))) + { +- hlsl_release_string_buffer(ctx, name); ++ vkd3d_free(new_name); + hlsl_cleanup_semantic(&new_semantic); + return NULL; + } +- hlsl_release_string_buffer(ctx, name); + if (output) + ext_var->is_output_semantic = 1; + else +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 705905f7888..6d7c89653e3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -296,7 +296,7 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali + return VKD3D_OK; + } + +-static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, ++void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) + { + reg->type = reg_type; +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 9b3084538ba..f93960d6d54 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -4792,13 +4792,16 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) + + static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) + { ++ unsigned int write_mask; ++ + if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) + { + FIXME("Invalid semantic index %u for clip/cull distance.\n", e->semantic_index); + return; + } + +- *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); ++ write_mask = e->mask >> vkd3d_write_mask_get_component_idx(e->mask); ++ *mask |= (write_mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); + } + + /* Emits arrayed SPIR-V built-in variables. */ +@@ -4962,7 +4965,6 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + component_type = builtin->component_type; + if (!builtin->spirv_array_size) + output_component_count = builtin->component_count; +- component_idx = 0; + } + else + { +@@ -4976,14 +4978,9 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + || needs_private_io_variable(builtin)) + { + use_private_variable = true; +- reg_write_mask = write_mask; +- } +- else +- { +- component_idx = vkd3d_write_mask_get_component_idx(write_mask); +- reg_write_mask = write_mask >> component_idx; + } + ++ reg_write_mask = write_mask >> component_idx; + vkd3d_symbol_make_register(®_symbol, reg); + + if (rb_get(&compiler->symbol_table, ®_symbol)) +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 2bc8613f2ef..a70894a160d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -1771,9 +1771,6 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, + { + void *params; + +- if (!count) +- return NULL; +- + if (count > allocator->count - allocator->index) + { + struct vkd3d_shader_param_node *next = shader_param_allocator_node_create(allocator); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 84614a4eb79..eab1c730ae9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -173,12 +173,14 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL = 8009, + VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID = 8010, + VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE = 8011, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND = 8012, + + VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, + VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, + VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, + VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, + VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS = 8304, ++ VKD3D_SHADER_WARNING_DXIL_UNHANDLED_INTRINSIC = 8305, + + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, + }; +@@ -556,6 +558,11 @@ enum vkd3d_data_type + VKD3D_DATA_UINT8, + }; + ++static inline bool data_type_is_integer(enum vkd3d_data_type data_type) ++{ ++ return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT; ++} ++ + enum vkd3d_immconst_type + { + VKD3D_IMMCONST_SCALAR, +@@ -734,6 +741,9 @@ struct vkd3d_shader_register + } u; + }; + ++void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, ++ enum vkd3d_data_type data_type, unsigned int idx_count); ++ + struct vkd3d_shader_dst_param + { + struct vkd3d_shader_register reg; +-- +2.40.1 +